702 files changed, 42007 insertions, 11251 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index 40ced0ea4316..ac722dd5e694 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -51,3 +51,15 @@ Description:
 		Boolean value indicating whether the PHY device is used in
 		standalone mode, without a net_device associated, by PHYLINK.
 		Attribute created only when this is the case.
+
+What:		/sys/class/mdio_bus/<bus>/<device>/phy_dev_flags
+Date:		March 2021
+KernelVersion:	5.13
+Contact:	netdev@vger.kernel.org
+Description:
+		32-bit hexadecimal number representing a bit mask of the
+		configuration bits passed from the consumer of the PHY
+		(Ethernet MAC, switch, etc.) to the PHY driver. The flags are
+		only used internally by the kernel and their placement are
+		not meant to be stable across kernel versions. This is intended
+		for facilitating the debugging of PHY drivers.
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index f2ab8a5b6a4b..2090bfc69aa5 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -311,6 +311,17 @@ permit to distribute the load on several cpus.
 If set to 1 (default), timestamps are sampled as soon as possible, before
 queueing.
 
+netdev_unregister_timeout_secs
+------------------------------
+
+Unregister network device timeout in seconds.
+This option controls the timeout (in seconds) used to issue a warning while
+waiting for a network device refcount to drop to 0 during device
+unregistration. A lower value may be useful during bisection to detect
+a leaked reference faster. A larger value may be useful to prevent false
+warnings on slow/loaded systems.
+Default value is 10, minimum 0, maximum 3600.
+
 optmem_max
 ----------
 
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 44dc789de2b4..846354cd2d69 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -84,6 +84,7 @@ sequentially and type id is assigned to each recognized type starting from id
     #define BTF_KIND_FUNC_PROTO     13      /* Function Proto       */
     #define BTF_KIND_VAR            14      /* Variable     */
     #define BTF_KIND_DATASEC        15      /* Section      */
+    #define BTF_KIND_FLOAT          16      /* Floating point       */
 
 Note that the type section encodes debug info, not just pure types.
 ``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@@ -95,8 +96,8 @@ Each type contains the following common data::
         /* "info" bits arrangement
          * bits  0-15: vlen (e.g. # of struct's members)
          * bits 16-23: unused
-         * bits 24-27: kind (e.g. int, ptr, array...etc)
-         * bits 28-30: unused
+         * bits 24-28: kind (e.g. int, ptr, array...etc)
+         * bits 29-30: unused
          * bit     31: kind_flag, currently used by
          *             struct, union and fwd
          */
@@ -452,6 +453,18 @@ map definition.
   * ``offset``: the in-section offset of the variable
   * ``size``: the size of the variable in bytes
 
+2.2.16 BTF_KIND_FLOAT
+~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: any valid offset
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_FLOAT
+ * ``info.vlen``: 0
+ * ``size``: the size of the float type in bytes: 2, 4, 8, 12 or 16.
+
+No additional type data follow ``btf_type``.
+
 3. BTF Kernel API
 *****************
 
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 4f2874b729c3..a702f67dd45f 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -12,9 +12,6 @@ BPF instruction-set.
 The Cilium project also maintains a `BPF and XDP Reference Guide`_
 that goes into great technical depth about the BPF Architecture.
 
-The primary info for the bpf syscall is available in the `man-pages`_
-for `bpf(2)`_.
-
 BPF Type Format (BTF)
 =====================
 
@@ -35,6 +32,12 @@ Two sets of Questions and Answers (Q&A) are maintained.
    bpf_design_QA
    bpf_devel_QA
 
+Syscall API
+===========
+
+The primary info for the bpf syscall is available in the `man-pages`_
+for `bpf(2)`_. For more information about the userspace API, see
+Documentation/userspace-api/ebpf/index.rst.
 
 Helper functions
 ================
diff --git a/Documentation/devicetree/bindings/net/actions,owl-emac.yaml b/Documentation/devicetree/bindings/net/actions,owl-emac.yaml
new file mode 100644
index 000000000000..1626e0a821b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/actions,owl-emac.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/actions,owl-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Actions Semi Owl SoCs Ethernet MAC Controller
+
+maintainers:
+  - Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
+
+description: |
+  This Ethernet MAC is used on the Owl family of SoCs from Actions Semi.
+  It provides the RMII and SMII interfaces and is compliant with the
+  IEEE 802.3 CSMA/CD standard, supporting both half-duplex and full-duplex
+  operation modes at 10/100 Mb/s data transfer rates.
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+properties:
+  compatible:
+    oneOf:
+      - const: actions,owl-emac
+      - items:
+          - enum:
+              - actions,s500-emac
+          - const: actions,owl-emac
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 2
+
+  clock-names:
+    additionalItems: false
+    items:
+      - const: eth
+      - const: rmii
+
+  resets:
+    maxItems: 1
+
+  actions,ethcfg:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the device containing custom config.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - phy-mode
+  - phy-handle
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/actions,s500-cmu.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/reset/actions,s500-reset.h>
+
+    ethernet@b0310000 {
+        compatible = "actions,s500-emac", "actions,owl-emac";
+        reg = <0xb0310000 0x10000>;
+        interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&cmu 59 /*CLK_ETHERNET*/>, <&cmu CLK_RMII_REF>;
+        clock-names = "eth", "rmii";
+        resets = <&cmu RESET_ETHERNET>;
+        phy-mode = "rmii";
+        phy-handle = <&eth_phy>;
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            eth_phy: ethernet-phy@3 {
+                reg = <0x3>;
+                interrupt-parent = <&sirq>;
+                interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
index 79c38ea14237..2a3be0f9a1a1 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
@@ -22,10 +22,18 @@ properties:
     maxItems: 1
 
   interrupts:
-    description: RX interrupt
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: RX interrupt
+      - description: TX interrupt
 
   interrupt-names:
-    const: rx
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: rx
+      - const: tx
 
 required:
   - reg
@@ -43,6 +51,7 @@ examples:
         compatible = "brcm,bcm4908-enet";
         reg = <0x80002000 0x1000>;
 
-        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
-        interrupt-names = "rx";
+        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "rx", "tx";
     };
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml b/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml
new file mode 100644
index 000000000000..2f34fda55fd0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/brcm,bcm6368-mdio-mux.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM6368 MDIO bus multiplexer
+
+maintainers:
+  - Álvaro Fernández Rojas <noltari@gmail.com>
+
+description:
+  This MDIO bus multiplexer defines buses that could be internal as well as
+  external to SoCs. When child bus is selected, one needs to select these two
+  properties as well to generate desired MDIO transaction on appropriate bus.
+
+allOf:
+  - $ref: "mdio.yaml#"
+
+properties:
+  compatible:
+    const: brcm,bcm6368-mdio-mux
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+patternProperties:
+  '^mdio@[0-1]$':
+    type: object
+    properties:
+      reg:
+        maxItems: 1
+
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+    required:
+      - reg
+      - "#address-cells"
+      - "#size-cells"
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mdio0: mdio@10e000b0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      compatible = "brcm,bcm6368-mdio-mux";
+      reg = <0x10e000b0 0x6>;
+
+      mdio_int: mdio@0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <0>;
+      };
+
+      mdio_ext: mdio@1 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <1>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt b/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt
index 886cbe8ffb38..e3829d3e480e 100644
--- a/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt
+++ b/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt
@@ -5,6 +5,10 @@ Required properties for GSWIP core:
 
 - compatible	: "lantiq,xrx200-gswip" for the embedded GSWIP in the
 		  xRX200 SoC
+		  "lantiq,xrx300-gswip" for the embedded GSWIP in the
+		  xRX300 SoC
+		  "lantiq,xrx330-gswip" for the embedded GSWIP in the
+		  xRX330 SoC
 - reg		: memory range of the GSWIP core registers
 		: memory range of the GSWIP MDIO registers
 		: memory range of the GSWIP MII registers
diff --git a/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst b/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
index c3f51c672a68..fc5acd427a5d 100644
--- a/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
+++ b/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
@@ -87,11 +87,15 @@ Receive Buffer
   contain one or more packets. The number of receive sections may be changed
   via ethtool Rx ring parameters.
 
-  There is a similar send buffer which is used to aggregate packets for sending.
-  The send area is broken into chunks of 6144 bytes, each of section may
-  contain one or more packets. The send buffer is an optimization, the driver
-  will use slower method to handle very large packets or if the send buffer
-  area is exhausted.
+  There is a similar send buffer which is used to aggregate packets
+  for sending.  The send area is broken into chunks, typically of 6144
+  bytes, each of section may contain one or more packets. Small
+  packets are usually transmitted via copy to the send buffer. However,
+  if the buffer is temporarily exhausted, or the packet to be transmitted is
+  an LSO packet, the driver will provide the host with pointers to the data
+  from the SKB. This attempts to achieve a balance between the overhead of
+  data copy and the impact of remapping VM memory to be accessible by the
+  host.
 
 XDP support
 -----------
diff --git a/Documentation/networking/device_drivers/fddi/defza.rst b/Documentation/networking/device_drivers/fddi/defza.rst
index 73c2f793ea26..7393f33ea705 100644
--- a/Documentation/networking/device_drivers/fddi/defza.rst
+++ b/Documentation/networking/device_drivers/fddi/defza.rst
@@ -60,4 +60,4 @@ To do:
 
 Both success and failure reports are welcome.
 
-Maciej W. Rozycki  <macro@linux-mips.org>
+Maciej W. Rozycki  <macro@orcam.me.uk>
diff --git a/Documentation/networking/devlink/devlink-health.rst b/Documentation/networking/devlink/devlink-health.rst
index 0c99b11f05f9..e37f77734b5b 100644
--- a/Documentation/networking/devlink/devlink-health.rst
+++ b/Documentation/networking/devlink/devlink-health.rst
@@ -24,7 +24,7 @@ attributes of the health reporting and recovery procedures.
 
 The ``devlink`` health reporter:
 Device driver creates a "health reporter" per each error/health type.
-Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
+Error/Health type can be a known/generic (e.g. PCI error, fw error, rx/tx error)
 or unknown (driver specific).
 For each registered health reporter a driver can issue error/health reports
 asynchronously. All health reports handling is done by ``devlink``.
@@ -48,6 +48,7 @@ Once an error is reported, devlink health will perform the following actions:
   * Object dump is being taken and saved at the reporter instance (as long as
     there is no other dump which is already stored)
   * Auto recovery attempt is being done. Depends on:
+
     - Auto-recovery configuration
     - Grace period vs. time passed since last recover
 
@@ -72,14 +73,18 @@ via ``devlink``, e.g per error type (per health reporter):
    * - ``DEVLINK_CMD_HEALTH_REPORTER_SET``
      - Allows reporter-related configuration setting.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_RECOVER``
-     - Triggers a reporter's recovery procedure.
+     - Triggers reporter's recovery procedure.
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_TEST``
+     - Triggers a fake health event on the reporter. The effects of the test
+       event in terms of recovery flow should follow closely that of a real
+       event.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE``
-     - Retrieves diagnostics data from a reporter on a device.
+     - Retrieves current device state related to the reporter.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET``
      - Retrieves the last stored dump. Devlink health
-       saves a single dump. If an dump is not already stored by the devlink
+       saves a single dump. If an dump is not already stored by devlink
        for this reporter, devlink generates a new dump.
-       dump output is defined by the reporter.
+       Dump output is defined by the reporter.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR``
      - Clears the last saved dump file for the specified reporter.
 
@@ -93,7 +98,7 @@ The following diagram provides a general overview of ``devlink-health``::
                                           +--------------------------+
                                                        |request for ops
                                                        |(diagnose,
-     mlx5_core                             devlink     |recover,
+      driver                               devlink     |recover,
                                                        |dump)
     +--------+                            +--------------------------+
     |        |                            |    reporter|             |
diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst
index 11bd5e6108c0..774f0e76c746 100644
--- a/Documentation/networking/dsa/configuration.rst
+++ b/Documentation/networking/dsa/configuration.rst
@@ -34,8 +34,15 @@ interface. The CPU port is the switch port connected to an Ethernet MAC chip.
 The corresponding linux Ethernet interface is called the master interface.
 All other corresponding linux interfaces are called slave interfaces.
 
-The slave interfaces depend on the master interface. They can only brought up,
-when the master interface is up.
+The slave interfaces depend on the master interface being up in order for them
+to send or receive traffic. Prior to kernel v5.12, the state of the master
+interface had to be managed explicitly by the user. Starting with kernel v5.12,
+the behavior is as follows:
+
+- when a DSA slave interface is brought up, the master interface is
+  automatically brought up.
+- when the master interface is brought down, all DSA slave interfaces are
+  automatically brought down.
 
 In this documentation the following Ethernet interfaces are used:
 
@@ -78,79 +85,76 @@ The tagging based configuration is desired and supported by the majority of
 DSA switches. These switches are capable to tag incoming and outgoing traffic
 without using a VLAN based configuration.
 
-single port
-~~~~~~~~~~~
-
-.. code-block:: sh
-
-  # configure each interface
-  ip addr add 192.0.2.1/30 dev lan1
-  ip addr add 192.0.2.5/30 dev lan2
-  ip addr add 192.0.2.9/30 dev lan3
-
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
+*single port*
+  .. code-block:: sh
 
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
+    # configure each interface
+    ip addr add 192.0.2.1/30 dev lan1
+    ip addr add 192.0.2.5/30 dev lan2
+    ip addr add 192.0.2.9/30 dev lan3
 
-bridge
-~~~~~~
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
+    ip link set eth0 up
 
-.. code-block:: sh
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
+*bridge*
+  .. code-block:: sh
 
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
+    ip link set eth0 up
 
-  # create bridge
-  ip link add name br0 type bridge
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # add ports to bridge
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
-  ip link set dev lan3 master br0
+    # create bridge
+    ip link add name br0 type bridge
 
-  # configure the bridge
-  ip addr add 192.0.2.129/25 dev br0
+    # add ports to bridge
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
+    ip link set dev lan3 master br0
 
-  # bring up the bridge
-  ip link set dev br0 up
+    # configure the bridge
+    ip addr add 192.0.2.129/25 dev br0
 
-gateway
-~~~~~~~
+    # bring up the bridge
+    ip link set dev br0 up
 
-.. code-block:: sh
+*gateway*
+  .. code-block:: sh
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
+    ip link set eth0 up
 
-  # bring up the slave interfaces
-  ip link set wan up
-  ip link set lan1 up
-  ip link set lan2 up
+    # bring up the slave interfaces
+    ip link set wan up
+    ip link set lan1 up
+    ip link set lan2 up
 
-  # configure the upstream port
-  ip addr add 192.0.2.1/30 dev wan
+    # configure the upstream port
+    ip addr add 192.0.2.1/30 dev wan
 
-  # create bridge
-  ip link add name br0 type bridge
+    # create bridge
+    ip link add name br0 type bridge
 
-  # add ports to bridge
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
+    # add ports to bridge
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
 
-  # configure the bridge
-  ip addr add 192.0.2.129/25 dev br0
+    # configure the bridge
+    ip addr add 192.0.2.129/25 dev br0
 
-  # bring up the bridge
-  ip link set dev br0 up
+    # bring up the bridge
+    ip link set dev br0 up
 
 .. _dsa-vlan-configuration:
 
@@ -161,132 +165,130 @@ A minority of switches are not capable to use a taging protocol
 (DSA_TAG_PROTO_NONE). These switches can be configured by a VLAN based
 configuration.
 
-single port
-~~~~~~~~~~~
-The configuration can only be set up via VLAN tagging and bridge setup.
-
-.. code-block:: sh
-
-  # tag traffic on CPU port
-  ip link add link eth0 name eth0.1 type vlan id 1
-  ip link add link eth0 name eth0.2 type vlan id 2
-  ip link add link eth0 name eth0.3 type vlan id 3
-
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
-  ip link set eth0.1 up
-  ip link set eth0.2 up
-  ip link set eth0.3 up
-
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
-
-  # create bridge
-  ip link add name br0 type bridge
-
-  # activate VLAN filtering
-  ip link set dev br0 type bridge vlan_filtering 1
-
-  # add ports to bridges
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
-  ip link set dev lan3 master br0
-
-  # tag traffic on ports
-  bridge vlan add dev lan1 vid 1 pvid untagged
-  bridge vlan add dev lan2 vid 2 pvid untagged
-  bridge vlan add dev lan3 vid 3 pvid untagged
-
-  # configure the VLANs
-  ip addr add 192.0.2.1/30 dev eth0.1
-  ip addr add 192.0.2.5/30 dev eth0.2
-  ip addr add 192.0.2.9/30 dev eth0.3
-
-  # bring up the bridge devices
-  ip link set br0 up
-
+*single port*
+  The configuration can only be set up via VLAN tagging and bridge setup.
 
-bridge
-~~~~~~
+  .. code-block:: sh
 
-.. code-block:: sh
+    # tag traffic on CPU port
+    ip link add link eth0 name eth0.1 type vlan id 1
+    ip link add link eth0 name eth0.2 type vlan id 2
+    ip link add link eth0 name eth0.3 type vlan id 3
 
-  # tag traffic on CPU port
-  ip link add link eth0 name eth0.1 type vlan id 1
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
+    ip link set eth0 up
+    ip link set eth0.1 up
+    ip link set eth0.2 up
+    ip link set eth0.3 up
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
-  ip link set eth0.1 up
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
+    # create bridge
+    ip link add name br0 type bridge
 
-  # create bridge
-  ip link add name br0 type bridge
+    # activate VLAN filtering
+    ip link set dev br0 type bridge vlan_filtering 1
 
-  # activate VLAN filtering
-  ip link set dev br0 type bridge vlan_filtering 1
+    # add ports to bridges
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
+    ip link set dev lan3 master br0
 
-  # add ports to bridge
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
-  ip link set dev lan3 master br0
-  ip link set eth0.1 master br0
+    # tag traffic on ports
+    bridge vlan add dev lan1 vid 1 pvid untagged
+    bridge vlan add dev lan2 vid 2 pvid untagged
+    bridge vlan add dev lan3 vid 3 pvid untagged
 
-  # tag traffic on ports
-  bridge vlan add dev lan1 vid 1 pvid untagged
-  bridge vlan add dev lan2 vid 1 pvid untagged
-  bridge vlan add dev lan3 vid 1 pvid untagged
+    # configure the VLANs
+    ip addr add 192.0.2.1/30 dev eth0.1
+    ip addr add 192.0.2.5/30 dev eth0.2
+    ip addr add 192.0.2.9/30 dev eth0.3
 
-  # configure the bridge
-  ip addr add 192.0.2.129/25 dev br0
+    # bring up the bridge devices
+    ip link set br0 up
 
-  # bring up the bridge
-  ip link set dev br0 up
 
-gateway
-~~~~~~~
+*bridge*
+  .. code-block:: sh
 
-.. code-block:: sh
+    # tag traffic on CPU port
+    ip link add link eth0 name eth0.1 type vlan id 1
 
-  # tag traffic on CPU port
-  ip link add link eth0 name eth0.1 type vlan id 1
-  ip link add link eth0 name eth0.2 type vlan id 2
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
+    ip link set eth0 up
+    ip link set eth0.1 up
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
-  ip link set eth0.1 up
-  ip link set eth0.2 up
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # bring up the slave interfaces
-  ip link set wan up
-  ip link set lan1 up
-  ip link set lan2 up
+    # create bridge
+    ip link add name br0 type bridge
 
-  # create bridge
-  ip link add name br0 type bridge
+    # activate VLAN filtering
+    ip link set dev br0 type bridge vlan_filtering 1
 
-  # activate VLAN filtering
-  ip link set dev br0 type bridge vlan_filtering 1
+    # add ports to bridge
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
+    ip link set dev lan3 master br0
+    ip link set eth0.1 master br0
 
-  # add ports to bridges
-  ip link set dev wan master br0
-  ip link set eth0.1 master br0
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
+    # tag traffic on ports
+    bridge vlan add dev lan1 vid 1 pvid untagged
+    bridge vlan add dev lan2 vid 1 pvid untagged
+    bridge vlan add dev lan3 vid 1 pvid untagged
 
-  # tag traffic on ports
-  bridge vlan add dev lan1 vid 1 pvid untagged
-  bridge vlan add dev lan2 vid 1 pvid untagged
-  bridge vlan add dev wan vid 2 pvid untagged
+    # configure the bridge
+    ip addr add 192.0.2.129/25 dev br0
 
-  # configure the VLANs
-  ip addr add 192.0.2.1/30 dev eth0.2
-  ip addr add 192.0.2.129/25 dev br0
+    # bring up the bridge
+    ip link set dev br0 up
 
-  # bring up the bridge devices
-  ip link set br0 up
+*gateway*
+  .. code-block:: sh
+
+    # tag traffic on CPU port
+    ip link add link eth0 name eth0.1 type vlan id 1
+    ip link add link eth0 name eth0.2 type vlan id 2
+
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
+    ip link set eth0 up
+    ip link set eth0.1 up
+    ip link set eth0.2 up
+
+    # bring up the slave interfaces
+    ip link set wan up
+    ip link set lan1 up
+    ip link set lan2 up
+
+    # create bridge
+    ip link add name br0 type bridge
+
+    # activate VLAN filtering
+    ip link set dev br0 type bridge vlan_filtering 1
+
+    # add ports to bridges
+    ip link set dev wan master br0
+    ip link set eth0.1 master br0
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
+
+    # tag traffic on ports
+    bridge vlan add dev lan1 vid 1 pvid untagged
+    bridge vlan add dev lan2 vid 1 pvid untagged
+    bridge vlan add dev wan vid 2 pvid untagged
+
+    # configure the VLANs
+    ip addr add 192.0.2.1/30 dev eth0.2
+    ip addr add 192.0.2.129/25 dev br0
+
+    # bring up the bridge devices
+    ip link set br0 up
diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index e9517af5fe02..8688009514cc 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -65,14 +65,8 @@ Note that DSA does not currently create network interfaces for the "cpu" and
 Switch tagging protocols
 ------------------------
 
-DSA currently supports 5 different tagging protocols, and a tag-less mode as
-well. The different protocols are implemented in:
-
-- ``net/dsa/tag_trailer.c``: Marvell's 4 trailer tag mode (legacy)
-- ``net/dsa/tag_dsa.c``: Marvell's original DSA tag
-- ``net/dsa/tag_edsa.c``: Marvell's enhanced DSA tag
-- ``net/dsa/tag_brcm.c``: Broadcom's 4 bytes tag
-- ``net/dsa/tag_qca.c``: Qualcomm's 2 bytes tag
+DSA supports many vendor-specific tagging protocols, one software-defined
+tagging protocol, and a tag-less mode as well (``DSA_TAG_PROTO_NONE``).
 
 The exact format of the tag protocol is vendor specific, but in general, they
 all contain something which:
@@ -80,6 +74,144 @@ all contain something which:
 - identifies which port the Ethernet frame came from/should be sent to
 - provides a reason why this frame was forwarded to the management interface
 
+All tagging protocols are in ``net/dsa/tag_*.c`` files and implement the
+methods of the ``struct dsa_device_ops`` structure, which are detailed below.
+
+Tagging protocols generally fall in one of three categories:
+
+1. The switch-specific frame header is located before the Ethernet header,
+   shifting to the right (from the perspective of the DSA master's frame
+   parser) the MAC DA, MAC SA, EtherType and the entire L2 payload.
+2. The switch-specific frame header is located before the EtherType, keeping
+   the MAC DA and MAC SA in place from the DSA master's perspective, but
+   shifting the 'real' EtherType and L2 payload to the right.
+3. The switch-specific frame header is located at the tail of the packet,
+   keeping all frame headers in place and not altering the view of the packet
+   that the DSA master's frame parser has.
+
+A tagging protocol may tag all packets with switch tags of the same length, or
+the tag length might vary (for example packets with PTP timestamps might
+require an extended switch tag, or there might be one tag length on TX and a
+different one on RX). Either way, the tagging protocol driver must populate the
+``struct dsa_device_ops::overhead`` with the length in octets of the longest
+switch frame header. The DSA framework will automatically adjust the MTU of the
+master interface to accomodate for this extra size in order for DSA user ports
+to support the standard MTU (L2 payload length) of 1500 octets. The ``overhead``
+is also used to request from the network stack, on a best-effort basis, the
+allocation of packets with a ``needed_headroom`` or ``needed_tailroom``
+sufficient such that the act of pushing the switch tag on transmission of a
+packet does not cause it to reallocate due to lack of memory.
+
+Even though applications are not expected to parse DSA-specific frame headers,
+the format on the wire of the tagging protocol represents an Application Binary
+Interface exposed by the kernel towards user space, for decoders such as
+``libpcap``. The tagging protocol driver must populate the ``proto`` member of
+``struct dsa_device_ops`` with a value that uniquely describes the
+characteristics of the interaction required between the switch hardware and the
+data path driver: the offset of each bit field within the frame header and any
+stateful processing required to deal with the frames (as may be required for
+PTP timestamping).
+
+From the perspective of the network stack, all switches within the same DSA
+switch tree use the same tagging protocol. In case of a packet transiting a
+fabric with more than one switch, the switch-specific frame header is inserted
+by the first switch in the fabric that the packet was received on. This header
+typically contains information regarding its type (whether it is a control
+frame that must be trapped to the CPU, or a data frame to be forwarded).
+Control frames should be decapsulated only by the software data path, whereas
+data frames might also be autonomously forwarded towards other user ports of
+other switches from the same fabric, and in this case, the outermost switch
+ports must decapsulate the packet.
+
+Note that in certain cases, it might be the case that the tagging format used
+by a leaf switch (not connected directly to the CPU) to not be the same as what
+the network stack sees. This can be seen with Marvell switch trees, where the
+CPU port can be configured to use either the DSA or the Ethertype DSA (EDSA)
+format, but the DSA links are configured to use the shorter (without Ethertype)
+DSA frame header, in order to reduce the autonomous packet forwarding overhead.
+It still remains the case that, if the DSA switch tree is configured for the
+EDSA tagging protocol, the operating system sees EDSA-tagged packets from the
+leaf switches that tagged them with the shorter DSA header. This can be done
+because the Marvell switch connected directly to the CPU is configured to
+perform tag translation between DSA and EDSA (which is simply the operation of
+adding or removing the ``ETH_P_EDSA`` EtherType and some padding octets).
+
+It is possible to construct cascaded setups of DSA switches even if their
+tagging protocols are not compatible with one another. In this case, there are
+no DSA links in this fabric, and each switch constitutes a disjoint DSA switch
+tree. The DSA links are viewed as simply a pair of a DSA master (the out-facing
+port of the upstream DSA switch) and a CPU port (the in-facing port of the
+downstream DSA switch).
+
+The tagging protocol of the attached DSA switch tree can be viewed through the
+``dsa/tagging`` sysfs attribute of the DSA master::
+
+    cat /sys/class/net/eth0/dsa/tagging
+
+If the hardware and driver are capable, the tagging protocol of the DSA switch
+tree can be changed at runtime. This is done by writing the new tagging
+protocol name to the same sysfs device attribute as above (the DSA master and
+all attached switch ports must be down while doing this).
+
+It is desirable that all tagging protocols are testable with the ``dsa_loop``
+mockup driver, which can be attached to any network interface. The goal is that
+any network interface should be capable of transmitting the same packet in the
+same way, and the tagger should decode the same received packet in the same way
+regardless of the driver used for the switch control path, and the driver used
+for the DSA master.
+
+The transmission of a packet goes through the tagger's ``xmit`` function.
+The passed ``struct sk_buff *skb`` has ``skb->data`` pointing at
+``skb_mac_header(skb)``, i.e. at the destination MAC address, and the passed
+``struct net_device *dev`` represents the virtual DSA user network interface
+whose hardware counterpart the packet must be steered to (i.e. ``swp0``).
+The job of this method is to prepare the skb in a way that the switch will
+understand what egress port the packet is for (and not deliver it towards other
+ports). Typically this is fulfilled by pushing a frame header. Checking for
+insufficient size in the skb headroom or tailroom is unnecessary provided that
+the ``overhead`` and ``tail_tag`` properties were filled out properly, because
+DSA ensures there is enough space before calling this method.
+
+The reception of a packet goes through the tagger's ``rcv`` function. The
+passed ``struct sk_buff *skb`` has ``skb->data`` pointing at
+``skb_mac_header(skb) + ETH_ALEN`` octets, i.e. to where the first octet after
+the EtherType would have been, were this frame not tagged. The role of this
+method is to consume the frame header, adjust ``skb->data`` to really point at
+the first octet after the EtherType, and to change ``skb->dev`` to point to the
+virtual DSA user network interface corresponding to the physical front-facing
+switch port that the packet was received on.
+
+Since tagging protocols in category 1 and 2 break software (and most often also
+hardware) packet dissection on the DSA master, features such as RPS (Receive
+Packet Steering) on the DSA master would be broken. The DSA framework deals
+with this by hooking into the flow dissector and shifting the offset at which
+the IP header is to be found in the tagged frame as seen by the DSA master.
+This behavior is automatic based on the ``overhead`` value of the tagging
+protocol. If not all packets are of equal size, the tagger can implement the
+``flow_dissect`` method of the ``struct dsa_device_ops`` and override this
+default behavior by specifying the correct offset incurred by each individual
+RX packet. Tail taggers do not cause issues to the flow dissector.
+
+Due to various reasons (most common being category 1 taggers being associated
+with DSA-unaware masters, mangling what the master perceives as MAC DA), the
+tagging protocol may require the DSA master to operate in promiscuous mode, to
+receive all frames regardless of the value of the MAC DA. This can be done by
+setting the ``promisc_on_master`` property of the ``struct dsa_device_ops``.
+Note that this assumes a DSA-unaware master driver, which is the norm.
+
+Hardware manufacturers are strongly discouraged to do this, but some tagging
+protocols might not provide source port information on RX for all packets, but
+e.g. only for control traffic (link-local PDUs). In this case, by implementing
+the ``filter`` method of ``struct dsa_device_ops``, the tagger might select
+which packets are to be redirected on RX towards the virtual DSA user network
+interfaces, and which are to be left in the DSA master's RX data path.
+
+It might also happen (although silicon vendors are strongly discouraged to
+produce hardware like this) that a tagging protocol splits the switch-specific
+information into a header portion and a tail portion, therefore not falling
+cleanly into any of the above 3 categories. DSA does not support this
+configuration.
+
 Master network devices
 ----------------------
 
@@ -172,23 +304,34 @@ Graphical representation
 Summarized, this is basically how DSA looks like from a network device
 perspective::
 
-
-                |---------------------------
-                | CPU network device (eth0)|
-                ----------------------------
-                | <tag added by switch     |
-                |                          |
-                |                          |
-                |        tag added by CPU> |
-        |--------------------------------------------|
-        |            Switch driver                   |
-        |--------------------------------------------|
-                  ||        ||         ||
-              |-------|  |-------|  |-------|
-              | sw0p0 |  | sw0p1 |  | sw0p2 |
-              |-------|  |-------|  |-------|
-
-
+                Unaware application
+              opens and binds socket
+                       |  ^
+                       |  |
+           +-----------v--|--------------------+
+           |+------+ +------+ +------+ +------+|
+           || swp0 | | swp1 | | swp2 | | swp3 ||
+           |+------+-+------+-+------+-+------+|
+           |          DSA switch driver        |
+           +-----------------------------------+
+                         |        ^
+            Tag added by |        | Tag consumed by
+           switch driver |        | switch driver
+                         v        |
+           +-----------------------------------+
+           | Unmodified host interface driver  | Software
+   --------+-----------------------------------+------------
+           |       Host interface (eth0)       | Hardware
+           +-----------------------------------+
+                         |        ^
+         Tag consumed by |        | Tag added by
+         switch hardware |        | switch hardware
+                         v        |
+           +-----------------------------------+
+           |               Switch              |
+           |+------+ +------+ +------+ +------+|
+           || swp0 | | swp1 | | swp2 | | swp3 ||
+           ++------+-+------+-+------+-+------++
 
 Slave MDIO bus
 --------------
@@ -239,14 +382,6 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as
 Design limitations
 ==================
 
-Limits on the number of devices and ports
------------------------------------------
-
-DSA currently limits the number of maximum switches within a tree to 4
-(``DSA_MAX_SWITCHES``), and the number of ports per switch to 12 (``DSA_MAX_PORTS``).
-These limits could be extended to support larger configurations would this need
-arise.
-
 Lack of CPU/DSA network devices
 -------------------------------
 
@@ -281,6 +416,7 @@ DSA currently leverages the following subsystems:
 - MDIO/PHY library: ``drivers/net/phy/phy.c``, ``mdio_bus.c``
 - Switchdev:``net/switchdev/*``
 - Device Tree for various of_* functions
+- Devlink: ``net/core/devlink.c``
 
 MDIO/PHY library
 ----------------
@@ -317,14 +453,39 @@ SWITCHDEV
 
 DSA directly utilizes SWITCHDEV when interfacing with the bridge layer, and
 more specifically with its VLAN filtering portion when configuring VLANs on top
-of per-port slave network devices. Since DSA primarily deals with
-MDIO-connected switches, although not exclusively, SWITCHDEV's
-prepare/abort/commit phases are often simplified into a prepare phase which
-checks whether the operation is supported by the DSA switch driver, and a commit
-phase which applies the changes.
-
-As of today, the only SWITCHDEV objects supported by DSA are the FDB and VLAN
-objects.
+of per-port slave network devices. As of today, the only SWITCHDEV objects
+supported by DSA are the FDB and VLAN objects.
+
+Devlink
+-------
+
+DSA registers one devlink device per physical switch in the fabric.
+For each devlink device, every physical port (i.e. user ports, CPU ports, DSA
+links or unused ports) is exposed as a devlink port.
+
+DSA drivers can make use of the following devlink features:
+
+- Regions: debugging feature which allows user space to dump driver-defined
+  areas of hardware information in a low-level, binary format. Both global
+  regions as well as per-port regions are supported. It is possible to export
+  devlink regions even for pieces of data that are already exposed in some way
+  to the standard iproute2 user space programs (ip-link, bridge), like address
+  tables and VLAN tables. For example, this might be useful if the tables
+  contain additional hardware-specific details which are not visible through
+  the iproute2 abstraction, or it might be useful to inspect these tables on
+  the non-user ports too, which are invisible to iproute2 because no network
+  interface is registered for them.
+- Params: a feature which enables user to configure certain low-level tunable
+  knobs pertaining to the device. Drivers may implement applicable generic
+  devlink params, or may add new device-specific devlink params.
+- Resources: a monitoring feature which enables users to see the degree of
+  utilization of certain hardware tables in the device, such as FDB, VLAN, etc.
+- Shared buffers: a QoS feature for adjusting and partitioning memory and frame
+  reservations per port and per traffic class, in the ingress and egress
+  directions, such that low-priority bulk traffic does not impede the
+  processing of high-priority critical traffic.
+
+For more details, consult ``Documentation/networking/devlink/``.
 
 Device Tree
 -----------
@@ -490,6 +651,17 @@ Bridge layer
   computing a STP state change based on current and asked parameters and perform
   the relevant ageing based on the intersection results
 
+- ``port_bridge_flags``: bridge layer function invoked when a port must
+  configure its settings for e.g. flooding of unknown traffic or source address
+  learning. The switch driver is responsible for initial setup of the
+  standalone ports with address learning disabled and egress flooding of all
+  types of traffic, then the DSA core notifies of any change to the bridge port
+  flags when the port joins and leaves a bridge. DSA does not currently manage
+  the bridge port flags for the CPU port. The assumption is that address
+  learning should be statically enabled (if supported by the hardware) on the
+  CPU port, and flooding towards the CPU port should also be enabled, due to a
+  lack of an explicit address filtering mechanism in the DSA core.
+
 Bridge VLAN filtering
 ---------------------
 
@@ -503,14 +675,10 @@ Bridge VLAN filtering
   accept any 802.1Q frames irrespective of their VLAN ID, and untagged frames are
   allowed.
 
-- ``port_vlan_prepare``: bridge layer function invoked when the bridge prepares the
-  configuration of a VLAN on the given port. If the operation is not supported
-  by the hardware, this function should return ``-EOPNOTSUPP`` to inform the bridge
-  code to fallback to a software implementation. No hardware setup must be done
-  in this function. See port_vlan_add for this and details.
-
 - ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
-  (tagged or untagged) for the given switch port
+  (tagged or untagged) for the given switch port. If the operation is not
+  supported by the hardware, this function should return ``-EOPNOTSUPP`` to
+  inform the bridge code to fallback to a software implementation.
 
 - ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
   given switch port
@@ -538,14 +706,10 @@ Bridge VLAN filtering
   function that the driver has to call for each MAC address known to be behind
   the given port. A switchdev object is used to carry the VID and FDB info.
 
-- ``port_mdb_prepare``: bridge layer function invoked when the bridge prepares the
-  installation of a multicast database entry. If the operation is not supported,
-  this function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback
-  to a software implementation. No hardware setup must be done in this function.
-  See ``port_fdb_add`` for this and details.
-
 - ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
-  a multicast database entry, the switch hardware should be programmed with the
+  a multicast database entry. If the operation is not supported, this function
+  should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
+  software implementation. The switch hardware should be programmed with the
   specified address in the specified VLAN ID in the forwarding database
   associated with this VLAN ID.
 
@@ -561,6 +725,101 @@ Bridge VLAN filtering
   function that the driver has to call for each MAC address known to be behind
   the given port. A switchdev object is used to carry the VID and MDB info.
 
+Link aggregation
+----------------
+
+Link aggregation is implemented in the Linux networking stack by the bonding
+and team drivers, which are modeled as virtual, stackable network interfaces.
+DSA is capable of offloading a link aggregation group (LAG) to hardware that
+supports the feature, and supports bridging between physical ports and LAGs,
+as well as between LAGs. A bonding/team interface which holds multiple physical
+ports constitutes a logical port, although DSA has no explicit concept of a
+logical port at the moment. Due to this, events where a LAG joins/leaves a
+bridge are treated as if all individual physical ports that are members of that
+LAG join/leave the bridge. Switchdev port attributes (VLAN filtering, STP
+state, etc) and objects (VLANs, MDB entries) offloaded to a LAG as bridge port
+are treated similarly: DSA offloads the same switchdev object / port attribute
+on all members of the LAG. Static bridge FDB entries on a LAG are not yet
+supported, since the DSA driver API does not have the concept of a logical port
+ID.
+
+- ``port_lag_join``: function invoked when a given switch port is added to a
+  LAG. The driver may return ``-EOPNOTSUPP``, and in this case, DSA will fall
+  back to a software implementation where all traffic from this port is sent to
+  the CPU.
+- ``port_lag_leave``: function invoked when a given switch port leaves a LAG
+  and returns to operation as a standalone port.
+- ``port_lag_change``: function invoked when the link state of any member of
+  the LAG changes, and the hashing function needs rebalancing to only make use
+  of the subset of physical LAG member ports that are up.
+
+Drivers that benefit from having an ID associated with each offloaded LAG
+can optionally populate ``ds->num_lag_ids`` from the ``dsa_switch_ops::setup``
+method. The LAG ID associated with a bonding/team interface can then be
+retrieved by a DSA switch driver using the ``dsa_lag_id`` function.
+
+IEC 62439-2 (MRP)
+-----------------
+
+The Media Redundancy Protocol is a topology management protocol optimized for
+fast fault recovery time for ring networks, which has some components
+implemented as a function of the bridge driver. MRP uses management PDUs
+(Test, Topology, LinkDown/Up, Option) sent at a multicast destination MAC
+address range of 01:15:4e:00:00:0x and with an EtherType of 0x88e3.
+Depending on the node's role in the ring (MRM: Media Redundancy Manager,
+MRC: Media Redundancy Client, MRA: Media Redundancy Automanager), certain MRP
+PDUs might need to be terminated locally and others might need to be forwarded.
+An MRM might also benefit from offloading to hardware the creation and
+transmission of certain MRP PDUs (Test).
+
+Normally an MRP instance can be created on top of any network interface,
+however in the case of a device with an offloaded data path such as DSA, it is
+necessary for the hardware, even if it is not MRP-aware, to be able to extract
+the MRP PDUs from the fabric before the driver can proceed with the software
+implementation. DSA today has no driver which is MRP-aware, therefore it only
+listens for the bare minimum switchdev objects required for the software assist
+to work properly. The operations are detailed below.
+
+- ``port_mrp_add`` and ``port_mrp_del``: notifies driver when an MRP instance
+  with a certain ring ID, priority, primary port and secondary port is
+  created/deleted.
+- ``port_mrp_add_ring_role`` and ``port_mrp_del_ring_role``: function invoked
+  when an MRP instance changes ring roles between MRM or MRC. This affects
+  which MRP PDUs should be trapped to software and which should be autonomously
+  forwarded.
+
+IEC 62439-3 (HSR/PRP)
+---------------------
+
+The Parallel Redundancy Protocol (PRP) is a network redundancy protocol which
+works by duplicating and sequence numbering packets through two independent L2
+networks (which are unaware of the PRP tail tags carried in the packets), and
+eliminating the duplicates at the receiver. The High-availability Seamless
+Redundancy (HSR) protocol is similar in concept, except all nodes that carry
+the redundant traffic are aware of the fact that it is HSR-tagged (because HSR
+uses a header with an EtherType of 0x892f) and are physically connected in a
+ring topology. Both HSR and PRP use supervision frames for monitoring the
+health of the network and for discovery of other nodes.
+
+In Linux, both HSR and PRP are implemented in the hsr driver, which
+instantiates a virtual, stackable network interface with two member ports.
+The driver only implements the basic roles of DANH (Doubly Attached Node
+implementing HSR) and DANP (Doubly Attached Node implementing PRP); the roles
+of RedBox and QuadBox are not implemented (therefore, bridging a hsr network
+interface with a physical switch port does not produce the expected result).
+
+A driver which is able of offloading certain functions of a DANP or DANH should
+declare the corresponding netdev features as indicated by the documentation at
+``Documentation/networking/netdev-features.rst``. Additionally, the following
+methods must be implemented:
+
+- ``port_hsr_join``: function invoked when a given switch port is added to a
+  DANP/DANH. The driver may return ``-EOPNOTSUPP`` and in this case, DSA will
+  fall back to a software implementation where all traffic from this port is
+  sent to the CPU.
+- ``port_hsr_leave``: function invoked when a given switch port leaves a
+  DANP/DANH and returns to normal operation as a standalone port.
+
 TODO
 ====
 
@@ -576,8 +835,5 @@ two subsystems and get the best of both worlds.
 Other hanging fruits
 --------------------
 
-- making the number of ports fully dynamic and not dependent on ``DSA_MAX_PORTS``
 - allowing more than one CPU/management interface:
   http://comments.gmane.org/gmane.linux.network/365657
-- porting more drivers from other vendors:
-  http://comments.gmane.org/gmane.linux.network/365510
diff --git a/Documentation/networking/nf_flowtable.rst b/Documentation/networking/nf_flowtable.rst
index 6cdf9a1724b6..d87f253b9d39 100644
--- a/Documentation/networking/nf_flowtable.rst
+++ b/Documentation/networking/nf_flowtable.rst
@@ -4,35 +4,38 @@
 Netfilter's flowtable infrastructure
 ====================================
 
-This documentation describes the software flowtable infrastructure available in
-Netfilter since Linux kernel 4.16.
+This documentation describes the Netfilter flowtable infrastructure which allows
+you to define a fastpath through the flowtable datapath. This infrastructure
+also provides hardware offload support. The flowtable supports for the layer 3
+IPv4 and IPv6 and the layer 4 TCP and UDP protocols.
 
 Overview
 --------
 
-Initial packets follow the classic forwarding path, once the flow enters the
-established state according to the conntrack semantics (ie. we have seen traffic
-in both directions), then you can decide to offload the flow to the flowtable
-from the forward chain via the 'flow offload' action available in nftables.
+Once the first packet of the flow successfully goes through the IP forwarding
+path, from the second packet on, you might decide to offload the flow to the
+flowtable through your ruleset. The flowtable infrastructure provides a rule
+action that allows you to specify when to add a flow to the flowtable.
 
-Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
-output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
-path (the visible effect is that you do not see these packets from any of the
-netfilter hooks coming after the ingress). In case of flowtable miss, the packet
-follows the classic forward path.
+A packet that finds a matching entry in the flowtable (ie. flowtable hit) is
+transmitted to the output netdevice via neigh_xmit(), hence, packets bypass the
+classic IP forwarding path (the visible effect is that you do not see these
+packets from any of the Netfilter hooks coming after ingress). In case that
+there is no matching entry in the flowtable (ie. flowtable miss), the packet
+follows the classic IP forwarding path.
 
-The flowtable uses a resizable hashtable, lookups are based on the following
-7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
-and destination ports and the input interface (useful in case there are several
-conntrack zones in place).
+The flowtable uses a resizable hashtable. Lookups are based on the following
+n-tuple selectors: layer 2 protocol encapsulation (VLAN and PPPoE), layer 3
+source and destination, layer 4 source and destination ports and the input
+interface (useful in case there are several conntrack zones in place).
 
-Flowtables are populated via the 'flow offload' nftables action, so the user can
-selectively specify what flows are placed into the flow table. Hence, packets
-follow the classic forwarding path unless the user explicitly instruct packets
-to use this new alternative forwarding path via nftables policy.
+The 'flow add' action allows you to populate the flowtable, the user selectively
+specifies what flows are placed into the flowtable. Hence, packets follow the
+classic IP forwarding path unless the user explicitly instruct flows to use this
+new alternative forwarding path via policy.
 
-This is represented in Fig.1, which describes the classic forwarding path
-including the Netfilter hooks and the flowtable fastpath bypass.
+The flowtable datapath is represented in Fig.1, which describes the classic IP
+forwarding path including the Netfilter hooks and the flowtable fastpath bypass.
 
 ::
 
@@ -67,11 +70,13 @@ including the Netfilter hooks and the flowtable fastpath bypass.
 	       Fig.1 Netfilter hooks and flowtable interactions
 
 The flowtable entry also stores the NAT configuration, so all packets are
-mangled according to the NAT policy that matches the initial packets that went
-through the classic forwarding path. The TTL is decremented before calling
-neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
-path given that the transport selectors are missing, therefore flowtable lookup
-is not possible.
+mangled according to the NAT policy that is specified from the classic IP
+forwarding path. The TTL is decremented before calling neigh_xmit(). Fragmented
+traffic is passed up to follow the classic IP forwarding path given that the
+transport header is missing, in this case, flowtable lookups are not possible.
+TCP RST and FIN packets are also passed up to the classic IP forwarding path to
+release the flow gracefully. Packets that exceed the MTU are also passed up to
+the classic forwarding path to report packet-too-big ICMP errors to the sender.
 
 Example configuration
 ---------------------
@@ -85,7 +90,7 @@ flowtable and add one rule to your forward chain::
 		}
 		chain y {
 			type filter hook forward priority 0; policy accept;
-			ip protocol tcp flow offload @f
+			ip protocol tcp flow add @f
 			counter packets 0 bytes 0
 		}
 	}
@@ -103,6 +108,117 @@ flow is offloaded, you will observe that the counter rule in the example above
 does not get updated for the packets that are being forwarded through the
 forwarding bypass.
 
+You can identify offloaded flows through the [OFFLOAD] tag when listing your
+connection tracking table.
+
+::
+	# conntrack -L
+	tcp      6 src=10.141.10.2 dst=192.168.10.2 sport=52728 dport=5201 src=192.168.10.2 dst=192.168.10.1 sport=5201 dport=52728 [OFFLOAD] mark=0 use=2
+
+
+Layer 2 encapsulation
+---------------------
+
+Since Linux kernel 5.13, the flowtable infrastructure discovers the real
+netdevice behind VLAN and PPPoE netdevices. The flowtable software datapath
+parses the VLAN and PPPoE layer 2 headers to extract the ethertype and the
+VLAN ID / PPPoE session ID which are used for the flowtable lookups. The
+flowtable datapath also deals with layer 2 decapsulation.
+
+You do not need to add the PPPoE and the VLAN devices to your flowtable,
+instead the real device is sufficient for the flowtable to track your flows.
+
+Bridge and IP forwarding
+------------------------
+
+Since Linux kernel 5.13, you can add bridge ports to the flowtable. The
+flowtable infrastructure discovers the topology behind the bridge device. This
+allows the flowtable to define a fastpath bypass between the bridge ports
+(represented as eth1 and eth2 in the example figure below) and the gateway
+device (represented as eth0) in your switch/router.
+
+::
+                      fastpath bypass
+               .-------------------------.
+              /                           \
+              |           IP forwarding   |
+              |          /             \ \/
+              |       br0               eth0 ..... eth0
+              .       / \                          *host B*
+               -> eth1  eth2
+                   .           *switch/router*
+                   .
+                   .
+                 eth0
+               *host A*
+
+The flowtable infrastructure also supports for bridge VLAN filtering actions
+such as PVID and untagged. You can also stack a classic VLAN device on top of
+your bridge port.
+
+If you would like that your flowtable defines a fastpath between your bridge
+ports and your IP forwarding path, you have to add your bridge ports (as
+represented by the real netdevice) to your flowtable definition.
+
+Counters
+--------
+
+The flowtable can synchronize packet and byte counters with the existing
+connection tracking entry by specifying the counter statement in your flowtable
+definition, e.g.
+
+::
+	table inet x {
+		flowtable f {
+			hook ingress priority 0; devices = { eth0, eth1 };
+			counter
+		}
+		...
+	}
+
+Counter support is available since Linux kernel 5.7.
+
+Hardware offload
+----------------
+
+If your network device provides hardware offload support, you can turn it on by
+means of the 'offload' flag in your flowtable definition, e.g.
+
+::
+	table inet x {
+		flowtable f {
+			hook ingress priority 0; devices = { eth0, eth1 };
+			flags offload;
+		}
+		...
+	}
+
+There is a workqueue that adds the flows to the hardware. Note that a few
+packets might still run over the flowtable software path until the workqueue has
+a chance to offload the flow to the network device.
+
+You can identify hardware offloaded flows through the [HW_OFFLOAD] tag when
+listing your connection tracking table. Please, note that the [OFFLOAD] tag
+refers to the software offload mode, so there is a distinction between [OFFLOAD]
+which refers to the software flowtable fastpath and [HW_OFFLOAD] which refers
+to the hardware offload datapath being used by the flow.
+
+The flowtable hardware offload infrastructure also supports for the DSA
+(Distributed Switch Architecture).
+
+Limitations
+-----------
+
+The flowtable behaves like a cache. The flowtable entries might get stale if
+either the destination MAC address or the egress netdevice that is used for
+transmission changes.
+
+This might be a problem if:
+
+- You run the flowtable in software mode and you combine bridge and IP
+  forwarding in your setup.
+- Hardware offload is enabled.
+
 More reading
 ------------
 
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 06adfc2afcf0..3f05d50ecd6e 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -80,8 +80,8 @@ values of phy_interface_t must be understood from the perspective of the PHY
 device itself, leading to the following:
 
 * PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
-  internal delay by itself, it assumes that either the Ethernet MAC (if capable
-  or the PCB traces) insert the correct 1.5-2ns delay
+  internal delay by itself, it assumes that either the Ethernet MAC (if capable)
+  or the PCB traces insert the correct 1.5-2ns delay
 
 * PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
   for the transmit data lines (TXD[3:0]) processed by the PHY device
diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst
index ddc3f35775dc..f1f4e6a85a29 100644
--- a/Documentation/networking/switchdev.rst
+++ b/Documentation/networking/switchdev.rst
@@ -181,18 +181,41 @@ To offloading L2 bridging, the switchdev driver/device should support:
 Static FDB Entries
 ^^^^^^^^^^^^^^^^^^
 
-The switchdev driver should implement ndo_fdb_add, ndo_fdb_del and ndo_fdb_dump
-to support static FDB entries installed to the device.  Static bridge FDB
-entries are installed, for example, using iproute2 bridge cmd::
-
-	bridge fdb add ADDR dev DEV [vlan VID] [self]
-
-The driver should use the helper switchdev_port_fdb_xxx ops for ndo_fdb_xxx
-ops, and handle add/delete/dump of SWITCHDEV_OBJ_ID_PORT_FDB object using
-switchdev_port_obj_xxx ops.
-
-XXX: what should be done if offloading this rule to hardware fails (for
-example, due to full capacity in hardware tables) ?
+A driver which implements the ``ndo_fdb_add``, ``ndo_fdb_del`` and
+``ndo_fdb_dump`` operations is able to support the command below, which adds a
+static bridge FDB entry::
+
+        bridge fdb add dev DEV ADDRESS [vlan VID] [self] static
+
+(the "static" keyword is non-optional: if not specified, the entry defaults to
+being "local", which means that it should not be forwarded)
+
+The "self" keyword (optional because it is implicit) has the role of
+instructing the kernel to fulfill the operation through the ``ndo_fdb_add``
+implementation of the ``DEV`` device itself. If ``DEV`` is a bridge port, this
+will bypass the bridge and therefore leave the software database out of sync
+with the hardware one.
+
+To avoid this, the "master" keyword can be used::
+
+        bridge fdb add dev DEV ADDRESS [vlan VID] master static
+
+The above command instructs the kernel to search for a master interface of
+``DEV`` and fulfill the operation through the ``ndo_fdb_add`` method of that.
+This time, the bridge generates a ``SWITCHDEV_FDB_ADD_TO_DEVICE`` notification
+which the port driver can handle and use it to program its hardware table. This
+way, the software and the hardware database will both contain this static FDB
+entry.
+
+Note: for new switchdev drivers that offload the Linux bridge, implementing the
+``ndo_fdb_add`` and ``ndo_fdb_del`` bridge bypass methods is strongly
+discouraged: all static FDB entries should be added on a bridge port using the
+"master" flag. The ``ndo_fdb_dump`` is an exception and can be implemented to
+visualize the hardware tables, if the device does not have an interrupt for
+notifying the operating system of newly learned/forgotten dynamic FDB
+addresses. In that case, the hardware FDB might end up having entries that the
+software FDB does not, and implementing ``ndo_fdb_dump`` is the only way to see
+them.
 
 Note: by default, the bridge does not filter on VLAN and only bridges untagged
 traffic.  To enable VLAN support, turn on VLAN filtering::
@@ -385,3 +408,156 @@ The driver can monitor for updates to arp_tbl using the netevent notifier
 NETEVENT_NEIGH_UPDATE.  The device can be programmed with resolved nexthops
 for the routes as arp_tbl updates.  The driver implements ndo_neigh_destroy
 to know when arp_tbl neighbor entries are purged from the port.
+
+Device driver expected behavior
+-------------------------------
+
+Below is a set of defined behavior that switchdev enabled network devices must
+adhere to.
+
+Configuration-less state
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Upon driver bring up, the network devices must be fully operational, and the
+backing driver must configure the network device such that it is possible to
+send and receive traffic to this network device and it is properly separated
+from other network devices/ports (e.g.: as is frequent with a switch ASIC). How
+this is achieved is heavily hardware dependent, but a simple solution can be to
+use per-port VLAN identifiers unless a better mechanism is available
+(proprietary metadata for each network port for instance).
+
+The network device must be capable of running a full IP protocol stack
+including multicast, DHCP, IPv4/6, etc. If necessary, it should program the
+appropriate filters for VLAN, multicast, unicast etc. The underlying device
+driver must effectively be configured in a similar fashion to what it would do
+when IGMP snooping is enabled for IP multicast over these switchdev network
+devices and unsolicited multicast must be filtered as early as possible in
+the hardware.
+
+When configuring VLANs on top of the network device, all VLANs must be working,
+irrespective of the state of other network devices (e.g.: other ports being part
+of a VLAN-aware bridge doing ingress VID checking). See below for details.
+
+If the device implements e.g.: VLAN filtering, putting the interface in
+promiscuous mode should allow the reception of all VLAN tags (including those
+not present in the filter(s)).
+
+Bridged switch ports
+^^^^^^^^^^^^^^^^^^^^
+
+When a switchdev enabled network device is added as a bridge member, it should
+not disrupt any functionality of non-bridged network devices and they
+should continue to behave as normal network devices. Depending on the bridge
+configuration knobs below, the expected behavior is documented.
+
+Bridge VLAN filtering
+^^^^^^^^^^^^^^^^^^^^^
+
+The Linux bridge allows the configuration of a VLAN filtering mode (statically,
+at device creation time, and dynamically, during run time) which must be
+observed by the underlying switchdev network device/hardware:
+
+- with VLAN filtering turned off: the bridge is strictly VLAN unaware and its
+  data path will process all Ethernet frames as if they are VLAN-untagged.
+  The bridge VLAN database can still be modified, but the modifications should
+  have no effect while VLAN filtering is turned off. Frames ingressing the
+  device with a VID that is not programmed into the bridge/switch's VLAN table
+  must be forwarded and may be processed using a VLAN device (see below).
+
+- with VLAN filtering turned on: the bridge is VLAN-aware and frames ingressing
+  the device with a VID that is not programmed into the bridges/switch's VLAN
+  table must be dropped (strict VID checking).
+
+When there is a VLAN device (e.g: sw0p1.100) configured on top of a switchdev
+network device which is a bridge port member, the behavior of the software
+network stack must be preserved, or the configuration must be refused if that
+is not possible.
+
+- with VLAN filtering turned off, the bridge will process all ingress traffic
+  for the port, except for the traffic tagged with a VLAN ID destined for a
+  VLAN upper. The VLAN upper interface (which consumes the VLAN tag) can even
+  be added to a second bridge, which includes other switch ports or software
+  interfaces. Some approaches to ensure that the forwarding domain for traffic
+  belonging to the VLAN upper interfaces are managed properly:
+
+    * If forwarding destinations can be managed per VLAN, the hardware could be
+      configured to map all traffic, except the packets tagged with a VID
+      belonging to a VLAN upper interface, to an internal VID corresponding to
+      untagged packets. This internal VID spans all ports of the VLAN-unaware
+      bridge. The VID corresponding to the VLAN upper interface spans the
+      physical port of that VLAN interface, as well as the other ports that
+      might be bridged with it.
+    * Treat bridge ports with VLAN upper interfaces as standalone, and let
+      forwarding be handled in the software data path.
+
+- with VLAN filtering turned on, these VLAN devices can be created as long as
+  the bridge does not have an existing VLAN entry with the same VID on any
+  bridge port. These VLAN devices cannot be enslaved into the bridge since they
+  duplicate functionality/use case with the bridge's VLAN data path processing.
+
+Non-bridged network ports of the same switch fabric must not be disturbed in any
+way by the enabling of VLAN filtering on the bridge device(s). If the VLAN
+filtering setting is global to the entire chip, then the standalone ports
+should indicate to the network stack that VLAN filtering is required by setting
+'rx-vlan-filter: on [fixed]' in the ethtool features.
+
+Because VLAN filtering can be turned on/off at runtime, the switchdev driver
+must be able to reconfigure the underlying hardware on the fly to honor the
+toggling of that option and behave appropriately. If that is not possible, the
+switchdev driver can also refuse to support dynamic toggling of the VLAN
+filtering knob at runtime and require a destruction of the bridge device(s) and
+creation of new bridge device(s) with a different VLAN filtering value to
+ensure VLAN awareness is pushed down to the hardware.
+
+Even when VLAN filtering in the bridge is turned off, the underlying switch
+hardware and driver may still configure itself in a VLAN-aware mode provided
+that the behavior described above is observed.
+
+The VLAN protocol of the bridge plays a role in deciding whether a packet is
+treated as tagged or not: a bridge using the 802.1ad protocol must treat both
+VLAN-untagged packets, as well as packets tagged with 802.1Q headers, as
+untagged.
+
+The 802.1p (VID 0) tagged packets must be treated in the same way by the device
+as untagged packets, since the bridge device does not allow the manipulation of
+VID 0 in its database.
+
+When the bridge has VLAN filtering enabled and a PVID is not configured on the
+ingress port, untagged and 802.1p tagged packets must be dropped. When the bridge
+has VLAN filtering enabled and a PVID exists on the ingress port, untagged and
+priority-tagged packets must be accepted and forwarded according to the
+bridge's port membership of the PVID VLAN. When the bridge has VLAN filtering
+disabled, the presence/lack of a PVID should not influence the packet
+forwarding decision.
+
+Bridge IGMP snooping
+^^^^^^^^^^^^^^^^^^^^
+
+The Linux bridge allows the configuration of IGMP snooping (statically, at
+interface creation time, or dynamically, during runtime) which must be observed
+by the underlying switchdev network device/hardware in the following way:
+
+- when IGMP snooping is turned off, multicast traffic must be flooded to all
+  ports within the same bridge that have mcast_flood=true. The CPU/management
+  port should ideally not be flooded (unless the ingress interface has
+  IFF_ALLMULTI or IFF_PROMISC) and continue to learn multicast traffic through
+  the network stack notifications. If the hardware is not capable of doing that
+  then the CPU/management port must also be flooded and multicast filtering
+  happens in software.
+
+- when IGMP snooping is turned on, multicast traffic must selectively flow
+  to the appropriate network ports (including CPU/management port). Flooding of
+  unknown multicast should be only towards the ports connected to a multicast
+  router (the local device may also act as a multicast router).
+
+The switch must adhere to RFC 4541 and flood multicast traffic accordingly
+since that is what the Linux bridge implementation does.
+
+Because IGMP snooping can be turned on/off at runtime, the switchdev driver
+must be able to reconfigure the underlying hardware on the fly to honor the
+toggling of that option and behave appropriately.
+
+A switchdev driver can also refuse to support dynamic toggling of the multicast
+snooping knob at runtime and require the destruction of the bridge device(s)
+and creation of a new bridge device(s) with a different multicast snooping
+value.
diff --git a/Documentation/userspace-api/ebpf/index.rst b/Documentation/userspace-api/ebpf/index.rst
new file mode 100644
index 000000000000..473dfba78116
--- /dev/null
+++ b/Documentation/userspace-api/ebpf/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+eBPF Userspace API
+==================
+
+eBPF is a kernel mechanism to provide a sandboxed runtime environment in the
+Linux kernel for runtime extension and instrumentation without changing kernel
+source code or loading kernel modules. eBPF programs can be attached to various
+kernel subsystems, including networking, tracing and Linux security modules
+(LSM).
+
+For internal kernel documentation on eBPF, see Documentation/bpf/index.rst.
+
+.. toctree::
+   :maxdepth: 1
+
+   syscall
diff --git a/Documentation/userspace-api/ebpf/syscall.rst b/Documentation/userspace-api/ebpf/syscall.rst
new file mode 100644
index 000000000000..ea9918084221
--- /dev/null
+++ b/Documentation/userspace-api/ebpf/syscall.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+eBPF Syscall
+------------
+
+:Authors: - Alexei Starovoitov <ast@kernel.org>
+          - Joe Stringer <joe@wand.net.nz>
+          - Michael Kerrisk <mtk.manpages@gmail.com>
+
+The primary info for the bpf syscall is available in the `man-pages`_
+for `bpf(2)`_.
+
+bpf() subcommand reference
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: include/uapi/linux/bpf.h
+   :doc: eBPF Syscall Preamble
+
+.. kernel-doc:: include/uapi/linux/bpf.h
+   :doc: eBPF Syscall Commands
+
+.. Links:
+.. _man-pages: https://www.kernel.org/doc/man-pages/
+.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index d29b020e5622..1e2438b7afa0 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -21,6 +21,7 @@ place where this information is gathered.
    unshare
    spec_ctrl
    accelerators/ocxl
+   ebpf/index
    ioctl/index
    iommu
    media/index
diff --git a/MAINTAINERS b/MAINTAINERS
index 8d23b0ec0c90..11177892c373 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1530,6 +1530,7 @@ F:	Documentation/devicetree/bindings/dma/owl-dma.yaml
 F:	Documentation/devicetree/bindings/i2c/i2c-owl.yaml
 F:	Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml
 F:	Documentation/devicetree/bindings/mmc/owl-mmc.yaml
+F:	Documentation/devicetree/bindings/net/actions,owl-emac.yaml
 F:	Documentation/devicetree/bindings/pinctrl/actions,*
 F:	Documentation/devicetree/bindings/power/actions,owl-sps.txt
 F:	Documentation/devicetree/bindings/timer/actions,owl-timer.txt
@@ -1542,6 +1543,7 @@ F:	drivers/dma/owl-dma.c
 F:	drivers/i2c/busses/i2c-owl.c
 F:	drivers/irqchip/irq-owl-sirq.c
 F:	drivers/mmc/host/owl-mmc.c
+F:	drivers/net/ethernet/actions/
 F:	drivers/pinctrl/actions/*
 F:	drivers/soc/actions/
 F:	include/dt-bindings/power/owl-*
@@ -3233,6 +3235,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
 F:	Documentation/bpf/
 F:	Documentation/networking/filter.rst
+F:	Documentation/userspace-api/ebpf/
 F:	arch/*/net/*
 F:	include/linux/bpf*
 F:	include/linux/filter.h
@@ -3247,6 +3250,7 @@ F:	net/core/filter.c
 F:	net/sched/act_bpf.c
 F:	net/sched/cls_bpf.c
 F:	samples/bpf/
+F:	scripts/bpf_doc.py
 F:	tools/bpf/
 F:	tools/lib/bpf/
 F:	tools/testing/selftests/bpf/
@@ -5471,11 +5475,11 @@ F:	drivers/net/ethernet/freescale/dpaa2/dpmac*
 F:	drivers/net/ethernet/freescale/dpaa2/dpni*
 
 DPAA2 ETHERNET SWITCH DRIVER
-M:	Ioana Radulescu <ruxandra.radulescu@nxp.com>
 M:	Ioana Ciornei <ioana.ciornei@nxp.com>
-L:	linux-kernel@vger.kernel.org
+L:	netdev@vger.kernel.org
 S:	Maintained
-F:	drivers/staging/fsl-dpaa2/ethsw
+F:	drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
+F:	drivers/net/ethernet/freescale/dpaa2/dpsw*
 
 DPT_I2O SCSI RAID DRIVER
 M:	Adaptec OEM Raid Solutions <aacraid@microsemi.com>
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 495fd0a1f040..b508df2ecada 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/atmdev.h>
 #include <linux/sonet.h>
-#include <linux/atm_suni.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
 #include <linux/firmware.h>
diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c
index c920a8c52925..21e5acc766b8 100644
--- a/drivers/atm/suni.c
+++ b/drivers/atm/suni.c
@@ -21,7 +21,6 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/capability.h>
-#include <linux/atm_suni.h>
 #include <linux/slab.h>
 #include <asm/param.h>
 #include <linux/uaccess.h>
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 7013a3f08429..14092152b786 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -2748,8 +2748,6 @@ hfcmulti_interrupt(int intno, void *dev_id)
 		if (hc->ctype != HFC_TYPE_E1)
 			ph_state_irq(hc, r_irq_statech);
 	}
-	if (status & V_EXT_IRQSTA)
-		; /* external IRQ */
 	if (status & V_LOST_STA) {
 		/* LOST IRQ */
 		HFC_outb(hc, R_INC_RES_FIFO, V_RES_LOST); /* clear irq! */
diff --git a/drivers/isdn/hardware/mISDN/iohelper.h b/drivers/isdn/hardware/mISDN/iohelper.h
index b2b2bde8edba..c81f7aba4b57 100644
--- a/drivers/isdn/hardware/mISDN/iohelper.h
+++ b/drivers/isdn/hardware/mISDN/iohelper.h
@@ -13,14 +13,14 @@
 #ifndef _IOHELPER_H
 #define _IOHELPER_H
 
-typedef	u8	(read_reg_func)(void *hwp, u8 offset);
-			       typedef	void	(write_reg_func)(void *hwp, u8 offset, u8 value);
-			       typedef	void	(fifo_func)(void *hwp, u8 offset, u8 *datap, int size);
+typedef u8 (read_reg_func)(void *hwp, u8 offset);
+typedef void (write_reg_func)(void *hwp, u8 offset, u8 value);
+typedef void (fifo_func)(void *hwp, u8 offset, u8 *datap, int size);
 
-			       struct _ioport {
-				       u32	port;
-				       u32	ale;
-			       };
+struct _ioport {
+	u32 port;
+	u32 ale;
+};
 
 #define IOFUNC_IO(name, hws, ap)					\
 	static u8 Read##name##_IO(void *p, u8 off) {			\
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 038e72a84b33..4946ea14bf74 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -953,7 +953,6 @@ dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 {
 	struct dsp		*dsp = container_of(ch, struct dsp, ch);
 	u_long		flags;
-	int		err = 0;
 
 	if (debug & DEBUG_DSP_CTRL)
 		printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd);
@@ -998,7 +997,7 @@ dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 		module_put(THIS_MODULE);
 		break;
 	}
-	return err;
+	return 0;
 }
 
 static void
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index b57dcb834594..facbd886ee1c 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -200,7 +200,7 @@
 
  The complete socket opening and closing is done by a thread.
  When the thread opened a socket, the hc->socket descriptor is set. Whenever a
- packet shall be sent to the socket, the hc->socket must be checked wheter not
+ packet shall be sent to the socket, the hc->socket must be checked whether not
  NULL. To prevent change in socket descriptor, the hc->socket_lock must be used.
  To change the socket, a recall of l1oip_socket_open() will safely kill the
  socket process and create a new one.
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index bcd31f458d1a..5895905b6aa1 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -579,6 +579,7 @@ config NETDEVSIM
 	depends on DEBUG_FS
 	depends on INET
 	depends on IPV6 || IPV6=n
+	depends on PSAMPLE || PSAMPLE=n
 	select NET_DEVLINK
 	help
 	  This driver is a developer testing tool and software model that can
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index f4990ff32fa4..040e20b81317 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -45,7 +45,7 @@ obj-$(CONFIG_ARCNET) += arcnet/
 obj-$(CONFIG_DEV_APPLETALK) += appletalk/
 obj-$(CONFIG_CAIF) += caif/
 obj-$(CONFIG_CAN) += can/
-obj-y += dsa/
+obj-$(CONFIG_NET_DSA) += dsa/
 obj-$(CONFIG_ETHERNET) += ethernet/
 obj-$(CONFIG_FDDI) += fddi/
 obj-$(CONFIG_HIPPI) += hippi/
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 77d7c38bd435..c9d3604ae129 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -640,6 +640,15 @@ static void bond_opt_error_interpret(struct bonding *bond,
 		netdev_err(bond->dev, "option %s: unable to set because the bond device is up\n",
 			   opt->name);
 		break;
+	case -ENODEV:
+		if (val && val->string) {
+			p = strchr(val->string, '\n');
+			if (p)
+				*p = '\0';
+			netdev_err(bond->dev, "option %s: interface %s does not exist!\n",
+				   opt->name, val->string);
+		}
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 3af373e90806..a5f1aa911fe2 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "Distributed Switch Architecture drivers"
-	depends on HAVE_NET_DSA
+	depends on NET_DSA
 
 source "drivers/net/dsa/b53/Kconfig"
 
 config NET_DSA_BCM_SF2
 	tristate "Broadcom Starfighter 2 Ethernet switch support"
-	depends on HAS_IOMEM && NET_DSA
+	depends on HAS_IOMEM
 	select NET_DSA_TAG_BRCM
 	select FIXED_PHY
 	select BCM7XXX_PHY
@@ -18,7 +18,6 @@ config NET_DSA_BCM_SF2
 
 config NET_DSA_LOOP
 	tristate "DSA mock-up Ethernet switch chip support"
-	depends on NET_DSA
 	select FIXED_PHY
 	help
 	  This enables support for a fake mock-up switch chip which
@@ -28,7 +27,7 @@ source "drivers/net/dsa/hirschmann/Kconfig"
 
 config NET_DSA_LANTIQ_GSWIP
 	tristate "Lantiq / Intel GSWIP"
-	depends on HAS_IOMEM && NET_DSA
+	depends on HAS_IOMEM
 	select NET_DSA_TAG_GSWIP
 	help
 	  This enables support for the Lantiq / Intel GSWIP 2.1 found in
@@ -36,7 +35,6 @@ config NET_DSA_LANTIQ_GSWIP
 
 config NET_DSA_MT7530
 	tristate "MediaTek MT753x and MT7621 Ethernet switch support"
-	depends on NET_DSA
 	select NET_DSA_TAG_MTK
 	help
 	  This enables support for the MediaTek MT7530, MT7531, and MT7621
@@ -44,7 +42,6 @@ config NET_DSA_MT7530
 
 config NET_DSA_MV88E6060
 	tristate "Marvell 88E6060 ethernet switch chip support"
-	depends on NET_DSA
 	select NET_DSA_TAG_TRAILER
 	help
 	  This enables support for the Marvell 88E6060 ethernet switch
@@ -64,7 +61,6 @@ source "drivers/net/dsa/xrs700x/Kconfig"
 
 config NET_DSA_QCA8K
 	tristate "Qualcomm Atheros QCA8K Ethernet switch family support"
-	depends on NET_DSA
 	select NET_DSA_TAG_QCA
 	select REGMAP
 	help
@@ -73,7 +69,6 @@ config NET_DSA_QCA8K
 
 config NET_DSA_REALTEK_SMI
 	tristate "Realtek SMI Ethernet switch family support"
-	depends on NET_DSA
 	select NET_DSA_TAG_RTL4_A
 	select FIXED_PHY
 	select IRQ_DOMAIN
@@ -93,7 +88,7 @@ config NET_DSA_SMSC_LAN9303
 
 config NET_DSA_SMSC_LAN9303_I2C
 	tristate "SMSC/Microchip LAN9303 3-ports 10/100 ethernet switch in I2C managed mode"
-	depends on NET_DSA && I2C
+	depends on I2C
 	select NET_DSA_SMSC_LAN9303
 	select REGMAP_I2C
 	help
@@ -102,7 +97,6 @@ config NET_DSA_SMSC_LAN9303_I2C
 
 config NET_DSA_SMSC_LAN9303_MDIO
 	tristate "SMSC/Microchip LAN9303 3-ports 10/100 ethernet switch in MDIO managed mode"
-	depends on NET_DSA
 	select NET_DSA_SMSC_LAN9303
 	help
 	  Enable access functions if the SMSC/Microchip LAN9303 is configured
@@ -110,7 +104,6 @@ config NET_DSA_SMSC_LAN9303_MDIO
 
 config NET_DSA_VITESSE_VSC73XX
 	tristate
-	depends on NET_DSA
 	select FIXED_PHY
 	select VITESSE_PHY
 	select GPIOLIB
@@ -120,7 +113,6 @@ config NET_DSA_VITESSE_VSC73XX
 
 config NET_DSA_VITESSE_VSC73XX_SPI
 	tristate "Vitesse VSC7385/7388/7395/7398 SPI mode support"
-	depends on NET_DSA
 	depends on SPI
 	select NET_DSA_VITESSE_VSC73XX
 	help
@@ -129,7 +121,6 @@ config NET_DSA_VITESSE_VSC73XX_SPI
 
 config NET_DSA_VITESSE_VSC73XX_PLATFORM
 	tristate "Vitesse VSC7385/7388/7395/7398 Platform mode support"
-	depends on NET_DSA
 	depends on HAS_IOMEM
 	select NET_DSA_VITESSE_VSC73XX
 	help
diff --git a/drivers/net/dsa/b53/Kconfig b/drivers/net/dsa/b53/Kconfig
index f9891a81c808..90b525160b71 100644
--- a/drivers/net/dsa/b53/Kconfig
+++ b/drivers/net/dsa/b53/Kconfig
@@ -3,6 +3,7 @@ menuconfig B53
 	tristate "Broadcom BCM53xx managed switch support"
 	depends on NET_DSA
 	select NET_DSA_TAG_BRCM
+	select NET_DSA_TAG_BRCM_LEGACY
 	select NET_DSA_TAG_BRCM_PREPEND
 	help
 	  This driver adds support for Broadcom managed switch chips. It supports
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index eb443721c58e..3ca6b394dd5f 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -349,7 +349,7 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
 	b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt);
 }
 
-static void b53_enable_vlan(struct b53_device *dev, bool enable,
+static void b53_enable_vlan(struct b53_device *dev, int port, bool enable,
 			    bool enable_filtering)
 {
 	u8 mgmt, vc0, vc1, vc4 = 0, vc5;
@@ -431,6 +431,9 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable,
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
 
 	dev->vlan_enabled = enable;
+
+	dev_dbg(dev->dev, "Port %d VLAN enabled: %d, filtering: %d\n",
+		port, enable, enable_filtering);
 }
 
 static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100)
@@ -743,7 +746,7 @@ int b53_configure_vlan(struct dsa_switch *ds)
 		b53_do_vlan_op(dev, VTA_CMD_CLEAR);
 	}
 
-	b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering);
+	b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering);
 
 	b53_for_each_port(dev, i)
 		b53_write16(dev, B53_VLAN_PAGE,
@@ -1422,7 +1425,7 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering,
 {
 	struct b53_device *dev = ds->priv;
 
-	b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering);
+	b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering);
 
 	return 0;
 }
@@ -1447,7 +1450,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port,
 	if (vlan->vid >= dev->num_vlans)
 		return -ERANGE;
 
-	b53_enable_vlan(dev, true, ds->vlan_filtering);
+	b53_enable_vlan(dev, port, true, ds->vlan_filtering);
 
 	return 0;
 }
@@ -2045,15 +2048,17 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port,
 {
 	struct b53_device *dev = ds->priv;
 
-	/* Older models (5325, 5365) support a different tag format that we do
-	 * not support in net/dsa/tag_brcm.c yet.
-	 */
-	if (is5325(dev) || is5365(dev) ||
-	    !b53_can_enable_brcm_tags(ds, port, mprot)) {
+	if (!b53_can_enable_brcm_tags(ds, port, mprot)) {
 		dev->tag_protocol = DSA_TAG_PROTO_NONE;
 		goto out;
 	}
 
+	/* Older models require a different 6 byte tag */
+	if (is5325(dev) || is5365(dev) || is63xx(dev)) {
+		dev->tag_protocol = DSA_TAG_PROTO_BRCM_LEGACY;
+		goto out;
+	}
+
 	/* Broadcom BCM58xx chips have a flow accelerator on Port 8
 	 * which requires us to use the prepended Broadcom tag type
 	 */
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index c628d0980c0b..82680e083cc2 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -16,6 +16,7 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <linux/bits.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/io.h>
@@ -228,11 +229,65 @@ static const struct b53_io_ops b53_mmap_ops = {
 	.write64 = b53_mmap_write64,
 };
 
+static int b53_mmap_probe_of(struct platform_device *pdev,
+			     struct b53_platform_data **ppdata)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *of_ports, *of_port;
+	struct device *dev = &pdev->dev;
+	struct b53_platform_data *pdata;
+	void __iomem *mem;
+
+	mem = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
+
+	pdata = devm_kzalloc(dev, sizeof(struct b53_platform_data),
+			     GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdata->regs = mem;
+	pdata->chip_id = BCM63XX_DEVICE_ID;
+	pdata->big_endian = of_property_read_bool(np, "big-endian");
+
+	of_ports = of_get_child_by_name(np, "ports");
+	if (!of_ports) {
+		dev_err(dev, "no ports child node found\n");
+		return -EINVAL;
+	}
+
+	for_each_available_child_of_node(of_ports, of_port) {
+		u32 reg;
+
+		if (of_property_read_u32(of_port, "reg", &reg))
+			continue;
+
+		if (reg < B53_CPU_PORT)
+			pdata->enabled_ports |= BIT(reg);
+	}
+
+	of_node_put(of_ports);
+	*ppdata = pdata;
+
+	return 0;
+}
+
 static int b53_mmap_probe(struct platform_device *pdev)
 {
+	struct device_node *np = pdev->dev.of_node;
 	struct b53_platform_data *pdata = pdev->dev.platform_data;
 	struct b53_mmap_priv *priv;
 	struct b53_device *dev;
+	int ret;
+
+	if (!pdata && np) {
+		ret = b53_mmap_probe_of(pdev, &pdata);
+		if (ret) {
+			dev_err(&pdev->dev, "OF probe error\n");
+			return ret;
+		}
+	}
 
 	if (!pdata)
 		return -EINVAL;
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 8419bb7f4505..82700a5714c1 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -186,11 +186,7 @@ static inline int is531x5(struct b53_device *dev)
 
 static inline int is63xx(struct b53_device *dev)
 {
-#ifdef CONFIG_BCM63XX
 	return dev->chip_id == BCM63XX_DEVICE_ID;
-#else
-	return 0;
-#endif
 }
 
 static inline int is5301x(struct b53_device *dev)
diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index 7abec8dab8ba..413158275db8 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -324,9 +324,22 @@ static int b53_spi_remove(struct spi_device *spi)
 	return 0;
 }
 
+static const struct of_device_id b53_spi_of_match[] = {
+	{ .compatible = "brcm,bcm5325" },
+	{ .compatible = "brcm,bcm5365" },
+	{ .compatible = "brcm,bcm5395" },
+	{ .compatible = "brcm,bcm5397" },
+	{ .compatible = "brcm,bcm5398" },
+	{ .compatible = "brcm,bcm53115" },
+	{ .compatible = "brcm,bcm53125" },
+	{ .compatible = "brcm,bcm53128" },
+	{ /* sentinel */ }
+};
+
 static struct spi_driver b53_spi_driver = {
 	.driver = {
 		.name	= "b53-switch",
+		.of_match_table = b53_spi_of_match,
 	},
 	.probe	= b53_spi_probe,
 	.remove	= b53_spi_remove,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index ba5d546d06aa..9150038b60cb 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -32,6 +32,36 @@
 #include "b53/b53_priv.h"
 #include "b53/b53_regs.h"
 
+static u16 bcm_sf2_reg_rgmii_cntrl(struct bcm_sf2_priv *priv, int port)
+{
+	switch (priv->type) {
+	case BCM4908_DEVICE_ID:
+		switch (port) {
+		case 7:
+			return REG_RGMII_11_CNTRL;
+		default:
+			break;
+		}
+		break;
+	default:
+		switch (port) {
+		case 0:
+			return REG_RGMII_0_CNTRL;
+		case 1:
+			return REG_RGMII_1_CNTRL;
+		case 2:
+			return REG_RGMII_2_CNTRL;
+		default:
+			break;
+		}
+	}
+
+	WARN_ONCE(1, "Unsupported port %d\n", port);
+
+	/* RO fallback reg */
+	return REG_SWITCH_STATUS;
+}
+
 /* Return the number of active ports, not counting the IMP (CPU) port */
 static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds)
 {
@@ -435,6 +465,44 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv)
 	return 0;
 }
 
+static void bcm_sf2_crossbar_setup(struct bcm_sf2_priv *priv)
+{
+	struct device *dev = priv->dev->ds->dev;
+	int shift;
+	u32 mask;
+	u32 reg;
+	int i;
+
+	mask = BIT(priv->num_crossbar_int_ports) - 1;
+
+	reg = reg_readl(priv, REG_CROSSBAR);
+	switch (priv->type) {
+	case BCM4908_DEVICE_ID:
+		shift = CROSSBAR_BCM4908_INT_P7 * priv->num_crossbar_int_ports;
+		reg &= ~(mask << shift);
+		if (0) /* FIXME */
+			reg |= CROSSBAR_BCM4908_EXT_SERDES << shift;
+		else if (priv->int_phy_mask & BIT(7))
+			reg |= CROSSBAR_BCM4908_EXT_GPHY4 << shift;
+		else if (phy_interface_mode_is_rgmii(priv->port_sts[7].mode))
+			reg |= CROSSBAR_BCM4908_EXT_RGMII << shift;
+		else if (WARN(1, "Invalid port mode\n"))
+			return;
+		break;
+	default:
+		return;
+	}
+	reg_writel(priv, reg, REG_CROSSBAR);
+
+	reg = reg_readl(priv, REG_CROSSBAR);
+	for (i = 0; i < priv->num_crossbar_int_ports; i++) {
+		shift = i * priv->num_crossbar_int_ports;
+
+		dev_dbg(dev, "crossbar int port #%d - ext port #%d\n", i,
+			(reg >> shift) & mask);
+	}
+}
+
 static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv)
 {
 	intrl2_0_mask_set(priv, 0xffffffff);
@@ -446,10 +514,11 @@ static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv)
 static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
 				   struct device_node *dn)
 {
+	struct device *dev = priv->dev->ds->dev;
+	struct bcm_sf2_port_status *port_st;
 	struct device_node *port;
 	unsigned int port_num;
 	struct property *prop;
-	phy_interface_t mode;
 	int err;
 
 	priv->moca_port = -1;
@@ -458,19 +527,26 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
 		if (of_property_read_u32(port, "reg", &port_num))
 			continue;
 
+		if (port_num >= DSA_MAX_PORTS) {
+			dev_err(dev, "Invalid port number %d\n", port_num);
+			continue;
+		}
+
+		port_st = &priv->port_sts[port_num];
+
 		/* Internal PHYs get assigned a specific 'phy-mode' property
 		 * value: "internal" to help flag them before MDIO probing
 		 * has completed, since they might be turned off at that
 		 * time
 		 */
-		err = of_get_phy_mode(port, &mode);
+		err = of_get_phy_mode(port, &port_st->mode);
 		if (err)
 			continue;
 
-		if (mode == PHY_INTERFACE_MODE_INTERNAL)
+		if (port_st->mode == PHY_INTERFACE_MODE_INTERNAL)
 			priv->int_phy_mask |= 1 << port_num;
 
-		if (mode == PHY_INTERFACE_MODE_MOCA)
+		if (port_st->mode == PHY_INTERFACE_MODE_MOCA)
 			priv->moca_port = port_num;
 
 		if (of_property_read_bool(port, "brcm,use-bcm-hdr"))
@@ -647,6 +723,7 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 id_mode_dis = 0, port_mode;
+	u32 reg_rgmii_ctrl;
 	u32 reg;
 
 	if (port == core_readl(priv, CORE_IMP0_PRT_ID))
@@ -670,10 +747,12 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
 		return;
 	}
 
+	reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+
 	/* Clear id_mode_dis bit, and the existing port mode, let
 	 * RGMII_MODE_EN bet set by mac_link_{up,down}
 	 */
-	reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+	reg = reg_readl(priv, reg_rgmii_ctrl);
 	reg &= ~ID_MODE_DIS;
 	reg &= ~(PORT_MODE_MASK << PORT_MODE_SHIFT);
 
@@ -681,13 +760,14 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
 	if (id_mode_dis)
 		reg |= ID_MODE_DIS;
 
-	reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+	reg_writel(priv, reg, reg_rgmii_ctrl);
 }
 
 static void bcm_sf2_sw_mac_link_set(struct dsa_switch *ds, int port,
 				    phy_interface_t interface, bool link)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+	u32 reg_rgmii_ctrl;
 	u32 reg;
 
 	if (!phy_interface_mode_is_rgmii(interface) &&
@@ -695,13 +775,15 @@ static void bcm_sf2_sw_mac_link_set(struct dsa_switch *ds, int port,
 	    interface != PHY_INTERFACE_MODE_REVMII)
 		return;
 
+	reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+
 	/* If the link is down, just disable the interface to conserve power */
-	reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+	reg = reg_readl(priv, reg_rgmii_ctrl);
 	if (link)
 		reg |= RGMII_MODE_EN;
 	else
 		reg &= ~RGMII_MODE_EN;
-	reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+	reg_writel(priv, reg, reg_rgmii_ctrl);
 }
 
 static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
@@ -735,11 +817,15 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->dev->ports[port].eee;
-	u32 reg, offset;
 
 	bcm_sf2_sw_mac_link_set(ds, port, interface, true);
 
 	if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
+		u32 reg_rgmii_ctrl;
+		u32 reg, offset;
+
+		reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+
 		if (priv->type == BCM4908_DEVICE_ID ||
 		    priv->type == BCM7445_DEVICE_ID)
 			offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
@@ -750,7 +836,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 		    interface == PHY_INTERFACE_MODE_RGMII_TXID ||
 		    interface == PHY_INTERFACE_MODE_MII ||
 		    interface == PHY_INTERFACE_MODE_REVMII) {
-			reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+			reg = reg_readl(priv, reg_rgmii_ctrl);
 			reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
 
 			if (tx_pause)
@@ -758,7 +844,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 			if (rx_pause)
 				reg |= RX_PAUSE_EN;
 
-			reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+			reg_writel(priv, reg, reg_rgmii_ctrl);
 		}
 
 		reg = SW_OVERRIDE | LINK_STS;
@@ -861,6 +947,8 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
 		return ret;
 	}
 
+	bcm_sf2_crossbar_setup(priv);
+
 	ret = bcm_sf2_cfp_resume(ds);
 	if (ret)
 		return ret;
@@ -1133,6 +1221,7 @@ struct bcm_sf2_of_data {
 	const u16 *reg_offsets;
 	unsigned int core_reg_align;
 	unsigned int num_cfp_rules;
+	unsigned int num_crossbar_int_ports;
 };
 
 static const u16 bcm_sf2_4908_reg_offsets[] = {
@@ -1144,9 +1233,7 @@ static const u16 bcm_sf2_4908_reg_offsets[] = {
 	[REG_PHY_REVISION]	= 0x14,
 	[REG_SPHY_CNTRL]	= 0x24,
 	[REG_CROSSBAR]		= 0xc8,
-	[REG_RGMII_0_CNTRL]	= 0xe0,
-	[REG_RGMII_1_CNTRL]	= 0xec,
-	[REG_RGMII_2_CNTRL]	= 0xf8,
+	[REG_RGMII_11_CNTRL]	= 0x014c,
 	[REG_LED_0_CNTRL]	= 0x40,
 	[REG_LED_1_CNTRL]	= 0x4c,
 	[REG_LED_2_CNTRL]	= 0x58,
@@ -1156,7 +1243,8 @@ static const struct bcm_sf2_of_data bcm_sf2_4908_data = {
 	.type		= BCM4908_DEVICE_ID,
 	.core_reg_align	= 0,
 	.reg_offsets	= bcm_sf2_4908_reg_offsets,
-	.num_cfp_rules	= 0, /* FIXME */
+	.num_cfp_rules	= 256,
+	.num_crossbar_int_ports = 2,
 };
 
 /* Register offsets for the SWITCH_REG_* block */
@@ -1267,6 +1355,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 	priv->reg_offsets = data->reg_offsets;
 	priv->core_reg_align = data->core_reg_align;
 	priv->num_cfp_rules = data->num_cfp_rules;
+	priv->num_crossbar_int_ports = data->num_crossbar_int_ports;
 
 	priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev,
 								"switch");
@@ -1340,6 +1429,8 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 		goto out_clk_mdiv;
 	}
 
+	bcm_sf2_crossbar_setup(priv);
+
 	bcm_sf2_gphy_enable_set(priv->dev->ds, true);
 
 	ret = bcm_sf2_mdio_register(ds);
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 1ed901a68536..0d48402068d3 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -44,6 +44,7 @@ struct bcm_sf2_hw_params {
 #define BCM_SF2_REGS_NUM	6
 
 struct bcm_sf2_port_status {
+	phy_interface_t mode;
 	unsigned int link;
 	bool enabled;
 };
@@ -73,6 +74,7 @@ struct bcm_sf2_priv {
 	const u16			*reg_offsets;
 	unsigned int			core_reg_align;
 	unsigned int			num_cfp_rules;
+	unsigned int			num_crossbar_int_ports;
 
 	/* spinlock protecting access to the indirect registers */
 	spinlock_t			indir_lock;
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 1d2d55c9f8aa..7bffc80f241f 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -21,6 +21,7 @@ enum bcm_sf2_reg_offs {
 	REG_RGMII_0_CNTRL,
 	REG_RGMII_1_CNTRL,
 	REG_RGMII_2_CNTRL,
+	REG_RGMII_11_CNTRL,
 	REG_LED_0_CNTRL,
 	REG_LED_1_CNTRL,
 	REG_LED_2_CNTRL,
@@ -48,7 +49,12 @@ enum bcm_sf2_reg_offs {
 #define  PHY_PHYAD_SHIFT		8
 #define  PHY_PHYAD_MASK			0x1F
 
-#define REG_RGMII_CNTRL_P(x)		(REG_RGMII_0_CNTRL + (x))
+/* Relative to REG_CROSSBAR */
+#define CROSSBAR_BCM4908_INT_P7		0
+#define CROSSBAR_BCM4908_INT_RUNNER	1
+#define CROSSBAR_BCM4908_EXT_SERDES	0
+#define CROSSBAR_BCM4908_EXT_GPHY4	1
+#define CROSSBAR_BCM4908_EXT_RGMII	2
 
 /* Relative to REG_RGMII_CNTRL */
 #define  RGMII_MODE_EN			(1 << 0)
diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 463137c39db2..918be7eb626f 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -433,7 +433,7 @@ static void hellcreek_unapply_vlan(struct hellcreek *hellcreek, int port,
 
 	mutex_lock(&hellcreek->reg_lock);
 
-	hellcreek_select_vlan(hellcreek, vid, 0);
+	hellcreek_select_vlan(hellcreek, vid, false);
 
 	/* Setup port vlan membership */
 	hellcreek_select_vlan_params(hellcreek, port, &shift, &mask);
@@ -596,6 +596,83 @@ static void hellcreek_setup_vlan_membership(struct dsa_switch *ds, int port,
 		hellcreek_unapply_vlan(hellcreek, upstream, vid);
 }
 
+static void hellcreek_port_set_ucast_flood(struct hellcreek *hellcreek,
+					   int port, bool enable)
+{
+	struct hellcreek_port *hellcreek_port;
+	u16 val;
+
+	hellcreek_port = &hellcreek->ports[port];
+
+	dev_dbg(hellcreek->dev, "%s unicast flooding on port %d\n",
+		enable ? "Enable" : "Disable", port);
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	hellcreek_select_port(hellcreek, port);
+	val = hellcreek_port->ptcfg;
+	if (enable)
+		val &= ~HR_PTCFG_UUC_FLT;
+	else
+		val |= HR_PTCFG_UUC_FLT;
+	hellcreek_write(hellcreek, val, HR_PTCFG);
+	hellcreek_port->ptcfg = val;
+
+	mutex_unlock(&hellcreek->reg_lock);
+}
+
+static void hellcreek_port_set_mcast_flood(struct hellcreek *hellcreek,
+					   int port, bool enable)
+{
+	struct hellcreek_port *hellcreek_port;
+	u16 val;
+
+	hellcreek_port = &hellcreek->ports[port];
+
+	dev_dbg(hellcreek->dev, "%s multicast flooding on port %d\n",
+		enable ? "Enable" : "Disable", port);
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	hellcreek_select_port(hellcreek, port);
+	val = hellcreek_port->ptcfg;
+	if (enable)
+		val &= ~HR_PTCFG_UMC_FLT;
+	else
+		val |= HR_PTCFG_UMC_FLT;
+	hellcreek_write(hellcreek, val, HR_PTCFG);
+	hellcreek_port->ptcfg = val;
+
+	mutex_unlock(&hellcreek->reg_lock);
+}
+
+static int hellcreek_pre_bridge_flags(struct dsa_switch *ds, int port,
+				      struct switchdev_brport_flags flags,
+				      struct netlink_ext_ack *extack)
+{
+	if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int hellcreek_bridge_flags(struct dsa_switch *ds, int port,
+				  struct switchdev_brport_flags flags,
+				  struct netlink_ext_ack *extack)
+{
+	struct hellcreek *hellcreek = ds->priv;
+
+	if (flags.mask & BR_FLOOD)
+		hellcreek_port_set_ucast_flood(hellcreek, port,
+					       !!(flags.val & BR_FLOOD));
+
+	if (flags.mask & BR_MCAST_FLOOD)
+		hellcreek_port_set_mcast_flood(hellcreek, port,
+					       !!(flags.val & BR_MCAST_FLOOD));
+
+	return 0;
+}
+
 static int hellcreek_port_bridge_join(struct dsa_switch *ds, int port,
 				      struct net_device *br)
 {
@@ -670,6 +747,40 @@ static int __hellcreek_fdb_del(struct hellcreek *hellcreek,
 	return hellcreek_wait_fdb_ready(hellcreek);
 }
 
+static void hellcreek_populate_fdb_entry(struct hellcreek *hellcreek,
+					 struct hellcreek_fdb_entry *entry,
+					 size_t idx)
+{
+	unsigned char addr[ETH_ALEN];
+	u16 meta, mac;
+
+	/* Read values */
+	meta	= hellcreek_read(hellcreek, HR_FDBMDRD);
+	mac	= hellcreek_read(hellcreek, HR_FDBRDL);
+	addr[5] = mac & 0xff;
+	addr[4] = (mac & 0xff00) >> 8;
+	mac	= hellcreek_read(hellcreek, HR_FDBRDM);
+	addr[3] = mac & 0xff;
+	addr[2] = (mac & 0xff00) >> 8;
+	mac	= hellcreek_read(hellcreek, HR_FDBRDH);
+	addr[1] = mac & 0xff;
+	addr[0] = (mac & 0xff00) >> 8;
+
+	/* Populate @entry */
+	memcpy(entry->mac, addr, sizeof(addr));
+	entry->idx	    = idx;
+	entry->portmask	    = (meta & HR_FDBMDRD_PORTMASK_MASK) >>
+		HR_FDBMDRD_PORTMASK_SHIFT;
+	entry->age	    = (meta & HR_FDBMDRD_AGE_MASK) >>
+		HR_FDBMDRD_AGE_SHIFT;
+	entry->is_obt	    = !!(meta & HR_FDBMDRD_OBT);
+	entry->pass_blocked = !!(meta & HR_FDBMDRD_PASS_BLOCKED);
+	entry->is_static    = !!(meta & HR_FDBMDRD_STATIC);
+	entry->reprio_tc    = (meta & HR_FDBMDRD_REPRIO_TC_MASK) >>
+		HR_FDBMDRD_REPRIO_TC_SHIFT;
+	entry->reprio_en    = !!(meta & HR_FDBMDRD_REPRIO_EN);
+}
+
 /* Retrieve the index of a FDB entry by mac address. Currently we search through
  * the complete table in hardware. If that's too slow, we might have to cache
  * the complete FDB table in software.
@@ -691,39 +802,19 @@ static int hellcreek_fdb_get(struct hellcreek *hellcreek,
 	 * enter new entries anywhere.
 	 */
 	for (i = 0; i < hellcreek->fdb_entries; ++i) {
-		unsigned char addr[ETH_ALEN];
-		u16 meta, mac;
-
-		meta	= hellcreek_read(hellcreek, HR_FDBMDRD);
-		mac	= hellcreek_read(hellcreek, HR_FDBRDL);
-		addr[5] = mac & 0xff;
-		addr[4] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDM);
-		addr[3] = mac & 0xff;
-		addr[2] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDH);
-		addr[1] = mac & 0xff;
-		addr[0] = (mac & 0xff00) >> 8;
+		struct hellcreek_fdb_entry tmp = { 0 };
+
+		/* Read entry */
+		hellcreek_populate_fdb_entry(hellcreek, &tmp, i);
 
 		/* Force next entry */
 		hellcreek_write(hellcreek, 0x00, HR_FDBRDH);
 
-		if (memcmp(addr, dest, ETH_ALEN))
+		if (memcmp(tmp.mac, dest, ETH_ALEN))
 			continue;
 
 		/* Match found */
-		entry->idx	    = i;
-		entry->portmask	    = (meta & HR_FDBMDRD_PORTMASK_MASK) >>
-			HR_FDBMDRD_PORTMASK_SHIFT;
-		entry->age	    = (meta & HR_FDBMDRD_AGE_MASK) >>
-			HR_FDBMDRD_AGE_SHIFT;
-		entry->is_obt	    = !!(meta & HR_FDBMDRD_OBT);
-		entry->pass_blocked = !!(meta & HR_FDBMDRD_PASS_BLOCKED);
-		entry->is_static    = !!(meta & HR_FDBMDRD_STATIC);
-		entry->reprio_tc    = (meta & HR_FDBMDRD_REPRIO_TC_MASK) >>
-			HR_FDBMDRD_REPRIO_TC_SHIFT;
-		entry->reprio_en    = !!(meta & HR_FDBMDRD_REPRIO_EN);
-		memcpy(entry->mac, addr, sizeof(addr));
+		memcpy(entry, &tmp, sizeof(*entry));
 
 		return 0;
 	}
@@ -838,18 +929,9 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
 	for (i = 0; i < hellcreek->fdb_entries; ++i) {
 		unsigned char null_addr[ETH_ALEN] = { 0 };
 		struct hellcreek_fdb_entry entry = { 0 };
-		u16 meta, mac;
-
-		meta	= hellcreek_read(hellcreek, HR_FDBMDRD);
-		mac	= hellcreek_read(hellcreek, HR_FDBRDL);
-		entry.mac[5] = mac & 0xff;
-		entry.mac[4] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDM);
-		entry.mac[3] = mac & 0xff;
-		entry.mac[2] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDH);
-		entry.mac[1] = mac & 0xff;
-		entry.mac[0] = (mac & 0xff00) >> 8;
+
+		/* Read entry */
+		hellcreek_populate_fdb_entry(hellcreek, &entry, i);
 
 		/* Force next entry */
 		hellcreek_write(hellcreek, 0x00, HR_FDBRDH);
@@ -858,10 +940,6 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
 		if (!memcmp(entry.mac, null_addr, ETH_ALEN))
 			continue;
 
-		entry.portmask	= (meta & HR_FDBMDRD_PORTMASK_MASK) >>
-			HR_FDBMDRD_PORTMASK_SHIFT;
-		entry.is_static	= !!(meta & HR_FDBMDRD_STATIC);
-
 		/* Check port mask */
 		if (!(entry.portmask & BIT(port)))
 			continue;
@@ -1004,6 +1082,22 @@ out:
 	return ret;
 }
 
+static int hellcreek_devlink_info_get(struct dsa_switch *ds,
+				      struct devlink_info_req *req,
+				      struct netlink_ext_ack *extack)
+{
+	struct hellcreek *hellcreek = ds->priv;
+	int ret;
+
+	ret = devlink_info_driver_name_put(req, "hellcreek");
+	if (ret)
+		return ret;
+
+	return devlink_info_version_fixed_put(req,
+					      DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
+					      hellcreek->pdata->name);
+}
+
 static u64 hellcreek_devlink_vlan_table_get(void *priv)
 {
 	struct hellcreek *hellcreek = priv;
@@ -1082,6 +1176,129 @@ out:
 	return err;
 }
 
+static int hellcreek_devlink_region_vlan_snapshot(struct devlink *dl,
+						  const struct devlink_region_ops *ops,
+						  struct netlink_ext_ack *extack,
+						  u8 **data)
+{
+	struct hellcreek_devlink_vlan_entry *table, *entry;
+	struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+	struct hellcreek *hellcreek = ds->priv;
+	int i;
+
+	table = kcalloc(VLAN_N_VID, sizeof(*entry), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	entry = table;
+
+	mutex_lock(&hellcreek->reg_lock);
+	for (i = 0; i < VLAN_N_VID; ++i, ++entry) {
+		entry->member = hellcreek->vidmbrcfg[i];
+		entry->vid    = i;
+	}
+	mutex_unlock(&hellcreek->reg_lock);
+
+	*data = (u8 *)table;
+
+	return 0;
+}
+
+static int hellcreek_devlink_region_fdb_snapshot(struct devlink *dl,
+						 const struct devlink_region_ops *ops,
+						 struct netlink_ext_ack *extack,
+						 u8 **data)
+{
+	struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+	struct hellcreek_fdb_entry *table, *entry;
+	struct hellcreek *hellcreek = ds->priv;
+	size_t i;
+
+	table = kcalloc(hellcreek->fdb_entries, sizeof(*entry), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	entry = table;
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	/* Start table read */
+	hellcreek_read(hellcreek, HR_FDBMAX);
+	hellcreek_write(hellcreek, 0x00, HR_FDBMAX);
+
+	for (i = 0; i < hellcreek->fdb_entries; ++i, ++entry) {
+		/* Read current entry */
+		hellcreek_populate_fdb_entry(hellcreek, entry, i);
+
+		/* Advance read pointer */
+		hellcreek_write(hellcreek, 0x00, HR_FDBRDH);
+	}
+
+	mutex_unlock(&hellcreek->reg_lock);
+
+	*data = (u8 *)table;
+
+	return 0;
+}
+
+static struct devlink_region_ops hellcreek_region_vlan_ops = {
+	.name	    = "vlan",
+	.snapshot   = hellcreek_devlink_region_vlan_snapshot,
+	.destructor = kfree,
+};
+
+static struct devlink_region_ops hellcreek_region_fdb_ops = {
+	.name	    = "fdb",
+	.snapshot   = hellcreek_devlink_region_fdb_snapshot,
+	.destructor = kfree,
+};
+
+static int hellcreek_setup_devlink_regions(struct dsa_switch *ds)
+{
+	struct hellcreek *hellcreek = ds->priv;
+	struct devlink_region_ops *ops;
+	struct devlink_region *region;
+	u64 size;
+	int ret;
+
+	/* VLAN table */
+	size = VLAN_N_VID * sizeof(struct hellcreek_devlink_vlan_entry);
+	ops  = &hellcreek_region_vlan_ops;
+
+	region = dsa_devlink_region_create(ds, ops, 1, size);
+	if (IS_ERR(region))
+		return PTR_ERR(region);
+
+	hellcreek->vlan_region = region;
+
+	/* FDB table */
+	size = hellcreek->fdb_entries * sizeof(struct hellcreek_fdb_entry);
+	ops  = &hellcreek_region_fdb_ops;
+
+	region = dsa_devlink_region_create(ds, ops, 1, size);
+	if (IS_ERR(region)) {
+		ret = PTR_ERR(region);
+		goto err_fdb;
+	}
+
+	hellcreek->fdb_region = region;
+
+	return 0;
+
+err_fdb:
+	dsa_devlink_region_destroy(hellcreek->vlan_region);
+
+	return ret;
+}
+
+static void hellcreek_teardown_devlink_regions(struct dsa_switch *ds)
+{
+	struct hellcreek *hellcreek = ds->priv;
+
+	dsa_devlink_region_destroy(hellcreek->fdb_region);
+	dsa_devlink_region_destroy(hellcreek->vlan_region);
+}
+
 static int hellcreek_setup(struct dsa_switch *ds)
 {
 	struct hellcreek *hellcreek = ds->priv;
@@ -1143,11 +1360,24 @@ static int hellcreek_setup(struct dsa_switch *ds)
 		return ret;
 	}
 
+	ret = hellcreek_setup_devlink_regions(ds);
+	if (ret) {
+		dev_err(hellcreek->dev,
+			"Failed to setup devlink regions!\n");
+		goto err_regions;
+	}
+
 	return 0;
+
+err_regions:
+	dsa_devlink_resources_unregister(ds);
+
+	return ret;
 }
 
 static void hellcreek_teardown(struct dsa_switch *ds)
 {
+	hellcreek_teardown_devlink_regions(ds);
 	dsa_devlink_resources_unregister(ds);
 }
 
@@ -1518,31 +1748,34 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port,
 }
 
 static const struct dsa_switch_ops hellcreek_ds_ops = {
-	.get_ethtool_stats   = hellcreek_get_ethtool_stats,
-	.get_sset_count	     = hellcreek_get_sset_count,
-	.get_strings	     = hellcreek_get_strings,
-	.get_tag_protocol    = hellcreek_get_tag_protocol,
-	.get_ts_info	     = hellcreek_get_ts_info,
-	.phylink_validate    = hellcreek_phylink_validate,
-	.port_bridge_join    = hellcreek_port_bridge_join,
-	.port_bridge_leave   = hellcreek_port_bridge_leave,
-	.port_disable	     = hellcreek_port_disable,
-	.port_enable	     = hellcreek_port_enable,
-	.port_fdb_add	     = hellcreek_fdb_add,
-	.port_fdb_del	     = hellcreek_fdb_del,
-	.port_fdb_dump	     = hellcreek_fdb_dump,
-	.port_hwtstamp_set   = hellcreek_port_hwtstamp_set,
-	.port_hwtstamp_get   = hellcreek_port_hwtstamp_get,
-	.port_prechangeupper = hellcreek_port_prechangeupper,
-	.port_rxtstamp	     = hellcreek_port_rxtstamp,
-	.port_setup_tc	     = hellcreek_port_setup_tc,
-	.port_stp_state_set  = hellcreek_port_stp_state_set,
-	.port_txtstamp	     = hellcreek_port_txtstamp,
-	.port_vlan_add	     = hellcreek_vlan_add,
-	.port_vlan_del	     = hellcreek_vlan_del,
-	.port_vlan_filtering = hellcreek_vlan_filtering,
-	.setup		     = hellcreek_setup,
-	.teardown	     = hellcreek_teardown,
+	.devlink_info_get      = hellcreek_devlink_info_get,
+	.get_ethtool_stats     = hellcreek_get_ethtool_stats,
+	.get_sset_count	       = hellcreek_get_sset_count,
+	.get_strings	       = hellcreek_get_strings,
+	.get_tag_protocol      = hellcreek_get_tag_protocol,
+	.get_ts_info	       = hellcreek_get_ts_info,
+	.phylink_validate      = hellcreek_phylink_validate,
+	.port_bridge_flags     = hellcreek_bridge_flags,
+	.port_bridge_join      = hellcreek_port_bridge_join,
+	.port_bridge_leave     = hellcreek_port_bridge_leave,
+	.port_disable	       = hellcreek_port_disable,
+	.port_enable	       = hellcreek_port_enable,
+	.port_fdb_add	       = hellcreek_fdb_add,
+	.port_fdb_del	       = hellcreek_fdb_del,
+	.port_fdb_dump	       = hellcreek_fdb_dump,
+	.port_hwtstamp_set     = hellcreek_port_hwtstamp_set,
+	.port_hwtstamp_get     = hellcreek_port_hwtstamp_get,
+	.port_pre_bridge_flags = hellcreek_pre_bridge_flags,
+	.port_prechangeupper   = hellcreek_port_prechangeupper,
+	.port_rxtstamp	       = hellcreek_port_rxtstamp,
+	.port_setup_tc	       = hellcreek_port_setup_tc,
+	.port_stp_state_set    = hellcreek_port_stp_state_set,
+	.port_txtstamp	       = hellcreek_port_txtstamp,
+	.port_vlan_add	       = hellcreek_vlan_add,
+	.port_vlan_del	       = hellcreek_vlan_del,
+	.port_vlan_filtering   = hellcreek_vlan_filtering,
+	.setup		       = hellcreek_setup,
+	.teardown	       = hellcreek_teardown,
 };
 
 static int hellcreek_probe(struct platform_device *pdev)
@@ -1693,6 +1926,7 @@ static int hellcreek_remove(struct platform_device *pdev)
 }
 
 static const struct hellcreek_platform_data de1soc_r1_pdata = {
+	.name		 = "r4c30",
 	.num_ports	 = 4,
 	.is_100_mbits	 = 1,
 	.qbv_support	 = 1,
diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h
index 305e76dab34d..9e303b8ab13c 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.h
+++ b/drivers/net/dsa/hirschmann/hellcreek.h
@@ -278,6 +278,8 @@ struct hellcreek {
 	struct mutex reg_lock;	/* Switch IP register lock */
 	struct mutex vlan_lock;	/* VLAN bitmaps lock */
 	struct mutex ptp_lock;	/* PTP IP register lock */
+	struct devlink_region *vlan_region;
+	struct devlink_region *fdb_region;
 	void __iomem *base;
 	void __iomem *ptp_base;
 	u16 swcfg;		/* swcfg shadow */
@@ -304,4 +306,9 @@ enum hellcreek_devlink_resource_id {
 	HELLCREEK_DEVLINK_PARAM_ID_FDB_TABLE,
 };
 
+struct hellcreek_devlink_vlan_entry {
+	u16 vid;
+	u16 member;
+};
+
 #endif /* _HELLCREEK_H_ */
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 52e865a3912c..26d0e3bb5dea 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Lantiq / Intel GSWIP switch driver for VRX200 SoCs
+ * Lantiq / Intel GSWIP switch driver for VRX200, xRX300 and xRX330 SoCs
  *
  * Copyright (C) 2010 Lantiq Deutschland
  * Copyright (C) 2012 John Crispin <john@phrozen.org>
@@ -100,6 +100,7 @@
 #define  GSWIP_MII_CFG_MODE_RMIIP	0x2
 #define  GSWIP_MII_CFG_MODE_RMIIM	0x3
 #define  GSWIP_MII_CFG_MODE_RGMII	0x4
+#define  GSWIP_MII_CFG_MODE_GMII	0x9
 #define  GSWIP_MII_CFG_MODE_MASK	0xf
 #define  GSWIP_MII_CFG_RATE_M2P5	0x00
 #define  GSWIP_MII_CFG_RATE_M25	0x10
@@ -220,6 +221,7 @@
 struct gswip_hw_info {
 	int max_ports;
 	int cpu_port;
+	const struct dsa_switch_ops *ops;
 };
 
 struct xway_gphy_match_data {
@@ -1384,12 +1386,42 @@ static int gswip_port_fdb_dump(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static void gswip_phylink_validate(struct dsa_switch *ds, int port,
-				   unsigned long *supported,
-				   struct phylink_link_state *state)
+static void gswip_phylink_set_capab(unsigned long *supported,
+				    struct phylink_link_state *state)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
+	/* Allow all the expected bits */
+	phylink_set(mask, Autoneg);
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+
+	/* With the exclusion of MII, Reverse MII and Reduced MII, we
+	 * support Gigabit, including Half duplex
+	 */
+	if (state->interface != PHY_INTERFACE_MODE_MII &&
+	    state->interface != PHY_INTERFACE_MODE_REVMII &&
+	    state->interface != PHY_INTERFACE_MODE_RMII) {
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseT_Half);
+	}
+
+	phylink_set(mask, 10baseT_Half);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Half);
+	phylink_set(mask, 100baseT_Full);
+
+	bitmap_and(supported, supported, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void gswip_xrx200_phylink_validate(struct dsa_switch *ds, int port,
+					  unsigned long *supported,
+					  struct phylink_link_state *state)
+{
 	switch (port) {
 	case 0:
 	case 1:
@@ -1416,38 +1448,54 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port,
 		return;
 	}
 
-	/* Allow all the expected bits */
-	phylink_set(mask, Autoneg);
-	phylink_set_port_modes(mask);
-	phylink_set(mask, Pause);
-	phylink_set(mask, Asym_Pause);
+	gswip_phylink_set_capab(supported, state);
 
-	/* With the exclusion of MII, Reverse MII and Reduced MII, we
-	 * support Gigabit, including Half duplex
-	 */
-	if (state->interface != PHY_INTERFACE_MODE_MII &&
-	    state->interface != PHY_INTERFACE_MODE_REVMII &&
-	    state->interface != PHY_INTERFACE_MODE_RMII) {
-		phylink_set(mask, 1000baseT_Full);
-		phylink_set(mask, 1000baseT_Half);
+	return;
+
+unsupported:
+	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	dev_err(ds->dev, "Unsupported interface '%s' for port %d\n",
+		phy_modes(state->interface), port);
+}
+
+static void gswip_xrx300_phylink_validate(struct dsa_switch *ds, int port,
+					  unsigned long *supported,
+					  struct phylink_link_state *state)
+{
+	switch (port) {
+	case 0:
+		if (!phy_interface_mode_is_rgmii(state->interface) &&
+		    state->interface != PHY_INTERFACE_MODE_GMII &&
+		    state->interface != PHY_INTERFACE_MODE_RMII)
+			goto unsupported;
+		break;
+	case 1:
+	case 2:
+	case 3:
+	case 4:
+		if (state->interface != PHY_INTERFACE_MODE_INTERNAL)
+			goto unsupported;
+		break;
+	case 5:
+		if (!phy_interface_mode_is_rgmii(state->interface) &&
+		    state->interface != PHY_INTERFACE_MODE_INTERNAL &&
+		    state->interface != PHY_INTERFACE_MODE_RMII)
+			goto unsupported;
+		break;
+	default:
+		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		dev_err(ds->dev, "Unsupported port: %i\n", port);
+		return;
 	}
 
-	phylink_set(mask, 10baseT_Half);
-	phylink_set(mask, 10baseT_Full);
-	phylink_set(mask, 100baseT_Half);
-	phylink_set(mask, 100baseT_Full);
+	gswip_phylink_set_capab(supported, state);
 
-	bitmap_and(supported, supported, mask,
-		   __ETHTOOL_LINK_MODE_MASK_NBITS);
-	bitmap_and(state->advertising, state->advertising, mask,
-		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 	return;
 
 unsupported:
 	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 	dev_err(ds->dev, "Unsupported interface '%s' for port %d\n",
 		phy_modes(state->interface), port);
-	return;
 }
 
 static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
@@ -1476,6 +1524,9 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
 	case PHY_INTERFACE_MODE_RGMII_TXID:
 		miicfg |= GSWIP_MII_CFG_MODE_RGMII;
 		break;
+	case PHY_INTERFACE_MODE_GMII:
+		miicfg |= GSWIP_MII_CFG_MODE_GMII;
+		break;
 	default:
 		dev_err(ds->dev,
 			"Unsupported interface: %d\n", state->interface);
@@ -1588,7 +1639,7 @@ static int gswip_get_sset_count(struct dsa_switch *ds, int port, int sset)
 	return ARRAY_SIZE(gswip_rmon_cnt);
 }
 
-static const struct dsa_switch_ops gswip_switch_ops = {
+static const struct dsa_switch_ops gswip_xrx200_switch_ops = {
 	.get_tag_protocol	= gswip_get_tag_protocol,
 	.setup			= gswip_setup,
 	.port_enable		= gswip_port_enable,
@@ -1603,7 +1654,31 @@ static const struct dsa_switch_ops gswip_switch_ops = {
 	.port_fdb_add		= gswip_port_fdb_add,
 	.port_fdb_del		= gswip_port_fdb_del,
 	.port_fdb_dump		= gswip_port_fdb_dump,
-	.phylink_validate	= gswip_phylink_validate,
+	.phylink_validate	= gswip_xrx200_phylink_validate,
+	.phylink_mac_config	= gswip_phylink_mac_config,
+	.phylink_mac_link_down	= gswip_phylink_mac_link_down,
+	.phylink_mac_link_up	= gswip_phylink_mac_link_up,
+	.get_strings		= gswip_get_strings,
+	.get_ethtool_stats	= gswip_get_ethtool_stats,
+	.get_sset_count		= gswip_get_sset_count,
+};
+
+static const struct dsa_switch_ops gswip_xrx300_switch_ops = {
+	.get_tag_protocol	= gswip_get_tag_protocol,
+	.setup			= gswip_setup,
+	.port_enable		= gswip_port_enable,
+	.port_disable		= gswip_port_disable,
+	.port_bridge_join	= gswip_port_bridge_join,
+	.port_bridge_leave	= gswip_port_bridge_leave,
+	.port_fast_age		= gswip_port_fast_age,
+	.port_vlan_filtering	= gswip_port_vlan_filtering,
+	.port_vlan_add		= gswip_port_vlan_add,
+	.port_vlan_del		= gswip_port_vlan_del,
+	.port_stp_state_set	= gswip_port_stp_state_set,
+	.port_fdb_add		= gswip_port_fdb_add,
+	.port_fdb_del		= gswip_port_fdb_del,
+	.port_fdb_dump		= gswip_port_fdb_dump,
+	.phylink_validate	= gswip_xrx300_phylink_validate,
 	.phylink_mac_config	= gswip_phylink_mac_config,
 	.phylink_mac_link_down	= gswip_phylink_mac_link_down,
 	.phylink_mac_link_up	= gswip_phylink_mac_link_up,
@@ -1832,7 +1907,7 @@ remove_gphy:
 static int gswip_probe(struct platform_device *pdev)
 {
 	struct gswip_priv *priv;
-	struct device_node *mdio_np, *gphy_fw_np;
+	struct device_node *np, *mdio_np, *gphy_fw_np;
 	struct device *dev = &pdev->dev;
 	int err;
 	int i;
@@ -1865,10 +1940,28 @@ static int gswip_probe(struct platform_device *pdev)
 	priv->ds->dev = dev;
 	priv->ds->num_ports = priv->hw_info->max_ports;
 	priv->ds->priv = priv;
-	priv->ds->ops = &gswip_switch_ops;
+	priv->ds->ops = priv->hw_info->ops;
 	priv->dev = dev;
 	version = gswip_switch_r(priv, GSWIP_VERSION);
 
+	np = dev->of_node;
+	switch (version) {
+	case GSWIP_VERSION_2_0:
+	case GSWIP_VERSION_2_1:
+		if (!of_device_is_compatible(np, "lantiq,xrx200-gswip"))
+			return -EINVAL;
+		break;
+	case GSWIP_VERSION_2_2:
+	case GSWIP_VERSION_2_2_ETC:
+		if (!of_device_is_compatible(np, "lantiq,xrx300-gswip") &&
+		    !of_device_is_compatible(np, "lantiq,xrx330-gswip"))
+			return -EINVAL;
+		break;
+	default:
+		dev_err(dev, "unknown GSWIP version: 0x%x", version);
+		return -ENOENT;
+	}
+
 	/* bring up the mdio bus */
 	gphy_fw_np = of_get_compatible_child(dev->of_node, "lantiq,gphy-fw");
 	if (gphy_fw_np) {
@@ -1946,10 +2039,19 @@ static int gswip_remove(struct platform_device *pdev)
 static const struct gswip_hw_info gswip_xrx200 = {
 	.max_ports = 7,
 	.cpu_port = 6,
+	.ops = &gswip_xrx200_switch_ops,
+};
+
+static const struct gswip_hw_info gswip_xrx300 = {
+	.max_ports = 7,
+	.cpu_port = 6,
+	.ops = &gswip_xrx300_switch_ops,
 };
 
 static const struct of_device_id gswip_of_match[] = {
 	{ .compatible = "lantiq,xrx200-gswip", .data = &gswip_xrx200 },
+	{ .compatible = "lantiq,xrx300-gswip", .data = &gswip_xrx300 },
+	{ .compatible = "lantiq,xrx330-gswip", .data = &gswip_xrx300 },
 	{},
 };
 MODULE_DEVICE_TABLE(of, gswip_of_match);
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 9871d7cff93a..c442a5885fca 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -997,8 +997,9 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
 	mt7530_write(priv, MT7530_PVC_P(port),
 		     PORT_SPEC_TAG);
 
-	/* Unknown multicast frame forwarding to the cpu port */
-	mt7530_rmw(priv, MT7530_MFC, UNM_FFP_MASK, UNM_FFP(BIT(port)));
+	/* Disable flooding by default */
+	mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK,
+		   BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port)));
 
 	/* Set CPU port number */
 	if (priv->id == ID_MT7621)
@@ -1136,6 +1137,56 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 }
 
 static int
+mt7530_port_pre_bridge_flags(struct dsa_switch *ds, int port,
+			     struct switchdev_brport_flags flags,
+			     struct netlink_ext_ack *extack)
+{
+	if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+			   BR_BCAST_FLOOD))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
+			 struct switchdev_brport_flags flags,
+			 struct netlink_ext_ack *extack)
+{
+	struct mt7530_priv *priv = ds->priv;
+
+	if (flags.mask & BR_LEARNING)
+		mt7530_rmw(priv, MT7530_PSC_P(port), SA_DIS,
+			   flags.val & BR_LEARNING ? 0 : SA_DIS);
+
+	if (flags.mask & BR_FLOOD)
+		mt7530_rmw(priv, MT7530_MFC, UNU_FFP(BIT(port)),
+			   flags.val & BR_FLOOD ? UNU_FFP(BIT(port)) : 0);
+
+	if (flags.mask & BR_MCAST_FLOOD)
+		mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
+			   flags.val & BR_MCAST_FLOOD ? UNM_FFP(BIT(port)) : 0);
+
+	if (flags.mask & BR_BCAST_FLOOD)
+		mt7530_rmw(priv, MT7530_MFC, BC_FFP(BIT(port)),
+			   flags.val & BR_BCAST_FLOOD ? BC_FFP(BIT(port)) : 0);
+
+	return 0;
+}
+
+static int
+mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
+			struct netlink_ext_ack *extack)
+{
+	struct mt7530_priv *priv = ds->priv;
+
+	mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
+		   mrouter ? UNM_FFP(BIT(port)) : 0);
+
+	return 0;
+}
+
+static int
 mt7530_port_bridge_join(struct dsa_switch *ds, int port,
 			struct net_device *bridge)
 {
@@ -1347,6 +1398,59 @@ err:
 }
 
 static int
+mt7530_port_mdb_add(struct dsa_switch *ds, int port,
+		    const struct switchdev_obj_port_mdb *mdb)
+{
+	struct mt7530_priv *priv = ds->priv;
+	const u8 *addr = mdb->addr;
+	u16 vid = mdb->vid;
+	u8 port_mask = 0;
+	int ret;
+
+	mutex_lock(&priv->reg_mutex);
+
+	mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
+	if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
+		port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
+			    & PORT_MAP_MASK;
+
+	port_mask |= BIT(port);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
+	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
+
+	mutex_unlock(&priv->reg_mutex);
+
+	return ret;
+}
+
+static int
+mt7530_port_mdb_del(struct dsa_switch *ds, int port,
+		    const struct switchdev_obj_port_mdb *mdb)
+{
+	struct mt7530_priv *priv = ds->priv;
+	const u8 *addr = mdb->addr;
+	u16 vid = mdb->vid;
+	u8 port_mask = 0;
+	int ret;
+
+	mutex_lock(&priv->reg_mutex);
+
+	mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
+	if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
+		port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
+			    & PORT_MAP_MASK;
+
+	port_mask &= ~BIT(port);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1,
+			 port_mask ? STATIC_ENT : STATIC_EMP);
+	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
+
+	mutex_unlock(&priv->reg_mutex);
+
+	return ret;
+}
+
+static int
 mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid)
 {
 	struct mt7530_dummy_poll p;
@@ -1818,9 +1922,12 @@ mt7530_setup(struct dsa_switch *ds)
 			ret = mt753x_cpu_port_enable(ds, i);
 			if (ret)
 				return ret;
-		} else
+		} else {
 			mt7530_port_disable(ds, i);
 
+			/* Disable learning by default on all user ports */
+			mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+		}
 		/* Enable consistent egress tag */
 		mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
 			   PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
@@ -1982,9 +2089,13 @@ mt7531_setup(struct dsa_switch *ds)
 			ret = mt753x_cpu_port_enable(ds, i);
 			if (ret)
 				return ret;
-		} else
+		} else {
 			mt7530_port_disable(ds, i);
 
+			/* Disable learning by default on all user ports */
+			mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+		}
+
 		/* Enable consistent egress tag */
 		mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
 			   PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
@@ -2706,11 +2817,16 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
 	.port_change_mtu	= mt7530_port_change_mtu,
 	.port_max_mtu		= mt7530_port_max_mtu,
 	.port_stp_state_set	= mt7530_stp_state_set,
+	.port_pre_bridge_flags	= mt7530_port_pre_bridge_flags,
+	.port_bridge_flags	= mt7530_port_bridge_flags,
+	.port_set_mrouter	= mt7530_port_set_mrouter,
 	.port_bridge_join	= mt7530_port_bridge_join,
 	.port_bridge_leave	= mt7530_port_bridge_leave,
 	.port_fdb_add		= mt7530_port_fdb_add,
 	.port_fdb_del		= mt7530_port_fdb_del,
 	.port_fdb_dump		= mt7530_port_fdb_dump,
+	.port_mdb_add		= mt7530_port_mdb_add,
+	.port_mdb_del		= mt7530_port_mdb_del,
 	.port_vlan_filtering	= mt7530_port_vlan_filtering,
 	.port_vlan_add		= mt7530_port_vlan_add,
 	.port_vlan_del		= mt7530_port_vlan_del,
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 64a9bb377e15..ec36ea5dfd57 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -34,6 +34,7 @@ enum mt753x_id {
 /* Registers to mac forward control for unknown frames */
 #define MT7530_MFC			0x10
 #define  BC_FFP(x)			(((x) & 0xff) << 24)
+#define  BC_FFP_MASK			BC_FFP(~0)
 #define  UNM_FFP(x)			(((x) & 0xff) << 16)
 #define  UNM_FFP_MASK			UNM_FFP(~0)
 #define  UNU_FFP(x)			(((x) & 0xff) << 8)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 903d619e08ed..95f07fcd4f85 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -485,12 +485,12 @@ static int mv88e6xxx_serdes_pcs_get_state(struct dsa_switch *ds, int port,
 					  struct phylink_link_state *state)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
-	u8 lane;
+	int lane;
 	int err;
 
 	mv88e6xxx_reg_lock(chip);
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane && chip->info->ops->serdes_pcs_get_state)
+	if (lane >= 0 && chip->info->ops->serdes_pcs_get_state)
 		err = chip->info->ops->serdes_pcs_get_state(chip, port, lane,
 							    state);
 	else
@@ -506,11 +506,11 @@ static int mv88e6xxx_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 				       const unsigned long *advertise)
 {
 	const struct mv88e6xxx_ops *ops = chip->info->ops;
-	u8 lane;
+	int lane;
 
 	if (ops->serdes_pcs_config) {
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (lane)
+		if (lane >= 0)
 			return ops->serdes_pcs_config(chip, port, lane, mode,
 						      interface, advertise);
 	}
@@ -523,14 +523,14 @@ static void mv88e6xxx_serdes_pcs_an_restart(struct dsa_switch *ds, int port)
 	struct mv88e6xxx_chip *chip = ds->priv;
 	const struct mv88e6xxx_ops *ops;
 	int err = 0;
-	u8 lane;
+	int lane;
 
 	ops = chip->info->ops;
 
 	if (ops->serdes_pcs_an_restart) {
 		mv88e6xxx_reg_lock(chip);
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (lane)
+		if (lane >= 0)
 			err = ops->serdes_pcs_an_restart(chip, port, lane);
 		mv88e6xxx_reg_unlock(chip);
 
@@ -544,11 +544,11 @@ static int mv88e6xxx_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
 					int speed, int duplex)
 {
 	const struct mv88e6xxx_ops *ops = chip->info->ops;
-	u8 lane;
+	int lane;
 
 	if (!phylink_autoneg_inband(mode) && ops->serdes_pcs_link_up) {
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (lane)
+		if (lane >= 0)
 			return ops->serdes_pcs_link_up(chip, port, lane,
 						       speed, duplex);
 	}
@@ -635,6 +635,29 @@ static void mv88e6390x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
 	mv88e6390_phylink_validate(chip, port, mask, state);
 }
 
+static void mv88e6393x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
+					unsigned long *mask,
+					struct phylink_link_state *state)
+{
+	if (port == 0 || port == 9 || port == 10) {
+		phylink_set(mask, 10000baseT_Full);
+		phylink_set(mask, 10000baseKR_Full);
+		phylink_set(mask, 10000baseCR_Full);
+		phylink_set(mask, 10000baseSR_Full);
+		phylink_set(mask, 10000baseLR_Full);
+		phylink_set(mask, 10000baseLRM_Full);
+		phylink_set(mask, 10000baseER_Full);
+		phylink_set(mask, 5000baseT_Full);
+		phylink_set(mask, 2500baseX_Full);
+		phylink_set(mask, 2500baseT_Full);
+	}
+
+	phylink_set(mask, 1000baseT_Full);
+	phylink_set(mask, 1000baseX_Full);
+
+	mv88e6065_phylink_validate(chip, port, mask, state);
+}
+
 static void mv88e6xxx_validate(struct dsa_switch *ds, int port,
 			       unsigned long *supported,
 			       struct phylink_link_state *state)
@@ -1456,6 +1479,13 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (dsa_to_port(ds, port)->lag_dev)
+		/* Hardware is incapable of fast-aging a LAG through a
+		 * regular ATU move operation. Until we have something
+		 * more fancy in place this is a no-op.
+		 */
+		return;
+
 	mv88e6xxx_reg_lock(chip);
 	err = mv88e6xxx_g1_atu_remove(chip, 0, port, false);
 	mv88e6xxx_reg_unlock(chip);
@@ -1472,13 +1502,54 @@ static int mv88e6xxx_vtu_setup(struct mv88e6xxx_chip *chip)
 	return mv88e6xxx_g1_vtu_flush(chip);
 }
 
-static int mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip,
-				 struct mv88e6xxx_vtu_entry *entry)
+static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
+			     struct mv88e6xxx_vtu_entry *entry)
+{
+	int err;
+
+	if (!chip->info->ops->vtu_getnext)
+		return -EOPNOTSUPP;
+
+	entry->vid = vid ? vid - 1 : mv88e6xxx_max_vid(chip);
+	entry->valid = false;
+
+	err = chip->info->ops->vtu_getnext(chip, entry);
+
+	if (entry->vid != vid)
+		entry->valid = false;
+
+	return err;
+}
+
+static int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
+			      int (*cb)(struct mv88e6xxx_chip *chip,
+					const struct mv88e6xxx_vtu_entry *entry,
+					void *priv),
+			      void *priv)
 {
+	struct mv88e6xxx_vtu_entry entry = {
+		.vid = mv88e6xxx_max_vid(chip),
+		.valid = false,
+	};
+	int err;
+
 	if (!chip->info->ops->vtu_getnext)
 		return -EOPNOTSUPP;
 
-	return chip->info->ops->vtu_getnext(chip, entry);
+	do {
+		err = chip->info->ops->vtu_getnext(chip, &entry);
+		if (err)
+			return err;
+
+		if (!entry.valid)
+			break;
+
+		err = cb(chip, &entry, priv);
+		if (err)
+			return err;
+	} while (entry.vid < mv88e6xxx_max_vid(chip));
+
+	return 0;
 }
 
 static int mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip,
@@ -1490,9 +1561,18 @@ static int mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 	return chip->info->ops->vtu_loadpurge(chip, entry);
 }
 
+static int mv88e6xxx_fid_map_vlan(struct mv88e6xxx_chip *chip,
+				  const struct mv88e6xxx_vtu_entry *entry,
+				  void *_fid_bitmap)
+{
+	unsigned long *fid_bitmap = _fid_bitmap;
+
+	set_bit(entry->fid, fid_bitmap);
+	return 0;
+}
+
 int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap)
 {
-	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 	u16 fid;
 
@@ -1508,21 +1588,7 @@ int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap)
 	}
 
 	/* Set every FID bit used by the VLAN entries */
-	vlan.vid = mv88e6xxx_max_vid(chip);
-	vlan.valid = false;
-
-	do {
-		err = mv88e6xxx_vtu_getnext(chip, &vlan);
-		if (err)
-			return err;
-
-		if (!vlan.valid)
-			break;
-
-		set_bit(vlan.fid, fid_bitmap);
-	} while (vlan.vid < mv88e6xxx_max_vid(chip));
-
-	return 0;
+	return mv88e6xxx_vtu_walk(chip, mv88e6xxx_fid_map_vlan, fid_bitmap);
 }
 
 static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid)
@@ -1559,19 +1625,13 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
 	if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
 		return 0;
 
-	vlan.vid = vid - 1;
-	vlan.valid = false;
-
-	err = mv88e6xxx_vtu_getnext(chip, &vlan);
+	err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 	if (err)
 		return err;
 
 	if (!vlan.valid)
 		return 0;
 
-	if (vlan.vid != vid)
-		return 0;
-
 	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
 			continue;
@@ -1653,15 +1713,12 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 		if (err)
 			return err;
 	} else {
-		vlan.vid = vid - 1;
-		vlan.valid = false;
-
-		err = mv88e6xxx_vtu_getnext(chip, &vlan);
+		err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 		if (err)
 			return err;
 
 		/* switchdev expects -EOPNOTSUPP to honor software VLANs */
-		if (vlan.vid != vid || !vlan.valid)
+		if (!vlan.valid)
 			return -EOPNOTSUPP;
 
 		fid = vlan.fid;
@@ -1911,8 +1968,10 @@ static int mv88e6xxx_set_rxnfc(struct dsa_switch *ds, int port,
 static int mv88e6xxx_port_add_broadcast(struct mv88e6xxx_chip *chip, int port,
 					u16 vid)
 {
-	const char broadcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	u8 state = MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC;
+	u8 broadcast[ETH_ALEN];
+
+	eth_broadcast_addr(broadcast);
 
 	return mv88e6xxx_port_db_load_purge(chip, port, broadcast, vid, state);
 }
@@ -1923,6 +1982,19 @@ static int mv88e6xxx_broadcast_setup(struct mv88e6xxx_chip *chip, u16 vid)
 	int err;
 
 	for (port = 0; port < mv88e6xxx_num_ports(chip); port++) {
+		struct dsa_port *dp = dsa_to_port(chip->ds, port);
+		struct net_device *brport;
+
+		if (dsa_is_unused_port(chip->ds, port))
+			continue;
+
+		brport = dsa_port_to_bridge_port(dp);
+		if (brport && !br_port_flag_is_set(brport, BR_BCAST_FLOOD))
+			/* Skip bridged user ports where broadcast
+			 * flooding is disabled.
+			 */
+			continue;
+
 		err = mv88e6xxx_port_add_broadcast(chip, port, vid);
 		if (err)
 			return err;
@@ -1931,6 +2003,53 @@ static int mv88e6xxx_broadcast_setup(struct mv88e6xxx_chip *chip, u16 vid)
 	return 0;
 }
 
+struct mv88e6xxx_port_broadcast_sync_ctx {
+	int port;
+	bool flood;
+};
+
+static int
+mv88e6xxx_port_broadcast_sync_vlan(struct mv88e6xxx_chip *chip,
+				   const struct mv88e6xxx_vtu_entry *vlan,
+				   void *_ctx)
+{
+	struct mv88e6xxx_port_broadcast_sync_ctx *ctx = _ctx;
+	u8 broadcast[ETH_ALEN];
+	u8 state;
+
+	if (ctx->flood)
+		state = MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC;
+	else
+		state = MV88E6XXX_G1_ATU_DATA_STATE_MC_UNUSED;
+
+	eth_broadcast_addr(broadcast);
+
+	return mv88e6xxx_port_db_load_purge(chip, ctx->port, broadcast,
+					    vlan->vid, state);
+}
+
+static int mv88e6xxx_port_broadcast_sync(struct mv88e6xxx_chip *chip, int port,
+					 bool flood)
+{
+	struct mv88e6xxx_port_broadcast_sync_ctx ctx = {
+		.port = port,
+		.flood = flood,
+	};
+	struct mv88e6xxx_vtu_entry vid0 = {
+		.vid = 0,
+	};
+	int err;
+
+	/* Update the port's private database... */
+	err = mv88e6xxx_port_broadcast_sync_vlan(chip, &vid0, &ctx);
+	if (err)
+		return err;
+
+	/* ...and the database for all VLANs. */
+	return mv88e6xxx_vtu_walk(chip, mv88e6xxx_port_broadcast_sync_vlan,
+				  &ctx);
+}
+
 static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port,
 				    u16 vid, u8 member, bool warn)
 {
@@ -1938,14 +2057,11 @@ static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port,
 	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 
-	vlan.vid = vid - 1;
-	vlan.valid = false;
-
-	err = mv88e6xxx_vtu_getnext(chip, &vlan);
+	err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 	if (err)
 		return err;
 
-	if (vlan.vid != vid || !vlan.valid) {
+	if (!vlan.valid) {
 		memset(&vlan, 0, sizeof(vlan));
 
 		err = mv88e6xxx_atu_new(chip, &vlan.fid);
@@ -2041,17 +2157,14 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip,
 	if (!vid)
 		return -EOPNOTSUPP;
 
-	vlan.vid = vid - 1;
-	vlan.valid = false;
-
-	err = mv88e6xxx_vtu_getnext(chip, &vlan);
+	err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 	if (err)
 		return err;
 
 	/* If the VLAN doesn't exist in hardware or the port isn't a member,
 	 * tell switchdev that this VLAN is likely handled in software.
 	 */
-	if (vlan.vid != vid || !vlan.valid ||
+	if (!vlan.valid ||
 	    vlan.member[port] == MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_NON_MEMBER)
 		return -EOPNOTSUPP;
 
@@ -2168,10 +2281,30 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 	return err;
 }
 
+struct mv88e6xxx_port_db_dump_vlan_ctx {
+	int port;
+	dsa_fdb_dump_cb_t *cb;
+	void *data;
+};
+
+static int mv88e6xxx_port_db_dump_vlan(struct mv88e6xxx_chip *chip,
+				       const struct mv88e6xxx_vtu_entry *entry,
+				       void *_data)
+{
+	struct mv88e6xxx_port_db_dump_vlan_ctx *ctx = _data;
+
+	return mv88e6xxx_port_db_dump_fid(chip, entry->fid, entry->vid,
+					  ctx->port, ctx->cb, ctx->data);
+}
+
 static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 				  dsa_fdb_dump_cb_t *cb, void *data)
 {
-	struct mv88e6xxx_vtu_entry vlan;
+	struct mv88e6xxx_port_db_dump_vlan_ctx ctx = {
+		.port = port,
+		.cb = cb,
+		.data = data,
+	};
 	u16 fid;
 	int err;
 
@@ -2184,25 +2317,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 	if (err)
 		return err;
 
-	/* Dump VLANs' Filtering Information Databases */
-	vlan.vid = mv88e6xxx_max_vid(chip);
-	vlan.valid = false;
-
-	do {
-		err = mv88e6xxx_vtu_getnext(chip, &vlan);
-		if (err)
-			return err;
-
-		if (!vlan.valid)
-			break;
-
-		err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port,
-						 cb, data);
-		if (err)
-			return err;
-	} while (vlan.vid < mv88e6xxx_max_vid(chip));
-
-	return err;
+	return mv88e6xxx_vtu_walk(chip, mv88e6xxx_port_db_dump_vlan, &ctx);
 }
 
 static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
@@ -2434,19 +2549,15 @@ static int mv88e6xxx_setup_message_port(struct mv88e6xxx_chip *chip, int port)
 
 static int mv88e6xxx_setup_egress_floods(struct mv88e6xxx_chip *chip, int port)
 {
-	struct dsa_switch *ds = chip->ds;
-	bool flood;
 	int err;
 
-	/* Upstream ports flood frames with unknown unicast or multicast DA */
-	flood = dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port);
 	if (chip->info->ops->port_set_ucast_flood) {
-		err = chip->info->ops->port_set_ucast_flood(chip, port, flood);
+		err = chip->info->ops->port_set_ucast_flood(chip, port, true);
 		if (err)
 			return err;
 	}
 	if (chip->info->ops->port_set_mcast_flood) {
-		err = chip->info->ops->port_set_mcast_flood(chip, port, flood);
+		err = chip->info->ops->port_set_mcast_flood(chip, port, true);
 		if (err)
 			return err;
 	}
@@ -2460,11 +2571,11 @@ static irqreturn_t mv88e6xxx_serdes_irq_thread_fn(int irq, void *dev_id)
 	struct mv88e6xxx_chip *chip = mvp->chip;
 	irqreturn_t ret = IRQ_NONE;
 	int port = mvp->port;
-	u8 lane;
+	int lane;
 
 	mv88e6xxx_reg_lock(chip);
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane)
+	if (lane >= 0)
 		ret = mv88e6xxx_serdes_irq_status(chip, port, lane);
 	mv88e6xxx_reg_unlock(chip);
 
@@ -2472,7 +2583,7 @@ static irqreturn_t mv88e6xxx_serdes_irq_thread_fn(int irq, void *dev_id)
 }
 
 static int mv88e6xxx_serdes_irq_request(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	struct mv88e6xxx_port *dev_id = &chip->ports[port];
 	unsigned int irq;
@@ -2501,7 +2612,7 @@ static int mv88e6xxx_serdes_irq_request(struct mv88e6xxx_chip *chip, int port,
 }
 
 static int mv88e6xxx_serdes_irq_free(struct mv88e6xxx_chip *chip, int port,
-				     u8 lane)
+				     int lane)
 {
 	struct mv88e6xxx_port *dev_id = &chip->ports[port];
 	unsigned int irq = dev_id->serdes_irq;
@@ -2526,11 +2637,11 @@ static int mv88e6xxx_serdes_irq_free(struct mv88e6xxx_chip *chip, int port,
 static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
 				  bool on)
 {
-	u8 lane;
+	int lane;
 	int err;
 
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (!lane)
+	if (lane < 0)
 		return 0;
 
 	if (on) {
@@ -2550,6 +2661,27 @@ static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
 	return err;
 }
 
+static int mv88e6xxx_set_egress_port(struct mv88e6xxx_chip *chip,
+				     enum mv88e6xxx_egress_direction direction,
+				     int port)
+{
+	int err;
+
+	if (!chip->info->ops->set_egress_port)
+		return -EOPNOTSUPP;
+
+	err = chip->info->ops->set_egress_port(chip, direction, port);
+	if (err)
+		return err;
+
+	if (direction == MV88E6XXX_EGRESS_DIR_INGRESS)
+		chip->ingress_dest_port = port;
+	else
+		chip->egress_dest_port = port;
+
+	return 0;
+}
+
 static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port)
 {
 	struct dsa_switch *ds = chip->ds;
@@ -2572,19 +2704,17 @@ static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port)
 				return err;
 		}
 
-		if (chip->info->ops->set_egress_port) {
-			err = chip->info->ops->set_egress_port(chip,
+		err = mv88e6xxx_set_egress_port(chip,
 						MV88E6XXX_EGRESS_DIR_INGRESS,
 						upstream_port);
-			if (err)
-				return err;
+		if (err && err != -EOPNOTSUPP)
+			return err;
 
-			err = chip->info->ops->set_egress_port(chip,
+		err = mv88e6xxx_set_egress_port(chip,
 						MV88E6XXX_EGRESS_DIR_EGRESS,
 						upstream_port);
-			if (err)
-				return err;
-		}
+		if (err && err != -EOPNOTSUPP)
+			return err;
 	}
 
 	return 0;
@@ -2670,15 +2800,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 			return err;
 	}
 
-	/* Port Association Vector: when learning source addresses
-	 * of packets, add the address to the address database using
-	 * a port bitmap that has only the bit for this port set and
-	 * the other bits clear.
+	/* Port Association Vector: disable automatic address learning
+	 * on all user ports since they start out in standalone
+	 * mode. When joining a bridge, learning will be configured to
+	 * match the bridge port settings. Enable learning on all
+	 * DSA/CPU ports. NOTE: FROM_CPU frames always bypass the
+	 * learning process.
+	 *
+	 * Disable HoldAt1, IntOnAgeOut, LockedPort, IgnoreWrongData,
+	 * and RefreshLocked. I.e. setup standard automatic learning.
 	 */
-	reg = 1 << port;
-	/* Disable learning for CPU port */
-	if (dsa_is_cpu_port(ds, port))
+	if (dsa_is_user_port(ds, port))
 		reg = 0;
+	else
+		reg = 1 << port;
 
 	err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR,
 				   reg);
@@ -4570,6 +4705,70 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.phylink_validate = mv88e6390x_phylink_validate,
 };
 
+static const struct mv88e6xxx_ops mv88e6393x_ops = {
+	/* MV88E6XXX_FAMILY_6393 */
+	.setup_errata = mv88e6393x_serdes_setup_errata,
+	.irl_init_all = mv88e6390_g2_irl_init_all,
+	.get_eeprom = mv88e6xxx_g2_get_eeprom8,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom8,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+	.port_set_link = mv88e6xxx_port_set_link,
+	.port_sync_link = mv88e6xxx_port_sync_link,
+	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+	.port_set_speed_duplex = mv88e6393x_port_set_speed_duplex,
+	.port_max_speed_mode = mv88e6393x_port_max_speed_mode,
+	.port_tag_remap = mv88e6390_port_tag_remap,
+	.port_set_policy = mv88e6393x_port_set_policy,
+	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
+	.port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
+	.port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
+	.port_set_ether_type = mv88e6393x_port_set_ether_type,
+	.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
+	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
+	.port_pause_limit = mv88e6390_port_pause_limit,
+	.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+	.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
+	.port_get_cmode = mv88e6352_port_get_cmode,
+	.port_set_cmode = mv88e6393x_port_set_cmode,
+	.port_setup_message_port = mv88e6xxx_setup_message_port,
+	.port_set_upstream_port = mv88e6393x_port_set_upstream_port,
+	.stats_snapshot = mv88e6390_g1_stats_snapshot,
+	.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+	.stats_get_sset_count = mv88e6320_stats_get_sset_count,
+	.stats_get_strings = mv88e6320_stats_get_strings,
+	.stats_get_stats = mv88e6390_stats_get_stats,
+	/* .set_cpu_port is missing because this family does not support a global
+	 * CPU port, only per port CPU port which is set via
+	 * .port_set_upstream_port method.
+	 */
+	.set_egress_port = mv88e6393x_set_egress_port,
+	.watchdog_ops = &mv88e6390_watchdog_ops,
+	.mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
+	.reset = mv88e6352_g1_reset,
+	.rmu_disable = mv88e6390_g1_rmu_disable,
+	.atu_get_hash = mv88e6165_g1_atu_get_hash,
+	.atu_set_hash = mv88e6165_g1_atu_set_hash,
+	.vtu_getnext = mv88e6390_g1_vtu_getnext,
+	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
+	.serdes_power = mv88e6393x_serdes_power,
+	.serdes_get_lane = mv88e6393x_serdes_get_lane,
+	.serdes_pcs_get_state = mv88e6393x_serdes_pcs_get_state,
+	.serdes_pcs_config = mv88e6390_serdes_pcs_config,
+	.serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
+	.serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
+	.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
+	.serdes_irq_enable = mv88e6393x_serdes_irq_enable,
+	.serdes_irq_status = mv88e6393x_serdes_irq_status,
+	/* TODO: serdes stats */
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
+	.ptp_ops = &mv88e6352_ptp_ops,
+	.phylink_validate = mv88e6393x_phylink_validate,
+};
+
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 	[MV88E6085] = {
 		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6085,
@@ -4941,6 +5140,52 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.ops = &mv88e6191_ops,
 	},
 
+	[MV88E6191X] = {
+		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6191X,
+		.family = MV88E6XXX_FAMILY_6393,
+		.name = "Marvell 88E6191X",
+		.num_databases = 4096,
+		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 9,
+		.max_vid = 8191,
+		.port_base_addr = 0x0,
+		.phy_base_addr = 0x0,
+		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
+		.age_time_coeff = 3750,
+		.g1_irqs = 10,
+		.g2_irqs = 14,
+		.atu_move_port_mask = 0x1f,
+		.pvt = true,
+		.multi_chip = true,
+		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
+		.ops = &mv88e6393x_ops,
+	},
+
+	[MV88E6193X] = {
+		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6193X,
+		.family = MV88E6XXX_FAMILY_6393,
+		.name = "Marvell 88E6193X",
+		.num_databases = 4096,
+		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 9,
+		.max_vid = 8191,
+		.port_base_addr = 0x0,
+		.phy_base_addr = 0x0,
+		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
+		.age_time_coeff = 3750,
+		.g1_irqs = 10,
+		.g2_irqs = 14,
+		.atu_move_port_mask = 0x1f,
+		.pvt = true,
+		.multi_chip = true,
+		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
+		.ops = &mv88e6393x_ops,
+	},
+
 	[MV88E6220] = {
 		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6220,
 		.family = MV88E6XXX_FAMILY_6250,
@@ -5231,6 +5476,29 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.ptp_support = true,
 		.ops = &mv88e6390x_ops,
 	},
+
+	[MV88E6393X] = {
+		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6393X,
+		.family = MV88E6XXX_FAMILY_6393,
+		.name = "Marvell 88E6393X",
+		.num_databases = 4096,
+		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 9,
+		.max_vid = 8191,
+		.port_base_addr = 0x0,
+		.phy_base_addr = 0x0,
+		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
+		.age_time_coeff = 3750,
+		.g1_irqs = 10,
+		.g2_irqs = 14,
+		.atu_move_port_mask = 0x1f,
+		.pvt = true,
+		.multi_chip = true,
+		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
+		.ops = &mv88e6393x_ops,
+	},
 };
 
 static const struct mv88e6xxx_info *mv88e6xxx_lookup_info(unsigned int prod_num)
@@ -5338,9 +5606,6 @@ static int mv88e6xxx_port_mirror_add(struct dsa_switch *ds, int port,
 	int i;
 	int err;
 
-	if (!chip->info->ops->set_egress_port)
-		return -EOPNOTSUPP;
-
 	mutex_lock(&chip->reg_lock);
 	if ((ingress ? chip->ingress_dest_port : chip->egress_dest_port) !=
 	    mirror->to_local_port) {
@@ -5355,9 +5620,8 @@ static int mv88e6xxx_port_mirror_add(struct dsa_switch *ds, int port,
 			goto out;
 		}
 
-		err = chip->info->ops->set_egress_port(chip,
-						       direction,
-						       mirror->to_local_port);
+		err = mv88e6xxx_set_egress_port(chip, direction,
+						mirror->to_local_port);
 		if (err)
 			goto out;
 	}
@@ -5390,10 +5654,8 @@ static void mv88e6xxx_port_mirror_del(struct dsa_switch *ds, int port,
 
 	/* Reset egress port when no other mirror is active */
 	if (!other_mirrors) {
-		if (chip->info->ops->set_egress_port(chip,
-						     direction,
-						     dsa_upstream_port(ds,
-								       port)))
+		if (mv88e6xxx_set_egress_port(chip, direction,
+					      dsa_upstream_port(ds, port)))
 			dev_err(ds->dev, "failed to set egress port\n");
 	}
 
@@ -5407,7 +5669,8 @@ static int mv88e6xxx_port_pre_bridge_flags(struct dsa_switch *ds, int port,
 	struct mv88e6xxx_chip *chip = ds->priv;
 	const struct mv88e6xxx_ops *ops;
 
-	if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD))
+	if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+			   BR_BCAST_FLOOD))
 		return -EINVAL;
 
 	ops = chip->info->ops;
@@ -5426,10 +5689,23 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
 				       struct netlink_ext_ack *extack)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	bool do_fast_age = false;
 	int err = -EOPNOTSUPP;
 
 	mv88e6xxx_reg_lock(chip);
 
+	if (flags.mask & BR_LEARNING) {
+		bool learning = !!(flags.val & BR_LEARNING);
+		u16 pav = learning ? (1 << port) : 0;
+
+		err = mv88e6xxx_port_set_assoc_vector(chip, port, pav);
+		if (err)
+			goto out;
+
+		if (!learning)
+			do_fast_age = true;
+	}
+
 	if (flags.mask & BR_FLOOD) {
 		bool unicast = !!(flags.val & BR_FLOOD);
 
@@ -5448,9 +5724,20 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
 			goto out;
 	}
 
+	if (flags.mask & BR_BCAST_FLOOD) {
+		bool broadcast = !!(flags.val & BR_BCAST_FLOOD);
+
+		err = mv88e6xxx_port_broadcast_sync(chip, port, broadcast);
+		if (err)
+			goto out;
+	}
+
 out:
 	mv88e6xxx_reg_unlock(chip);
 
+	if (do_fast_age)
+		mv88e6xxx_port_fast_age(ds, port);
+
 	return err;
 }
 
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index a57c8886f3ac..bce6e0dc8535 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -63,6 +63,8 @@ enum mv88e6xxx_model {
 	MV88E6190,
 	MV88E6190X,
 	MV88E6191,
+	MV88E6191X,
+	MV88E6193X,
 	MV88E6220,
 	MV88E6240,
 	MV88E6250,
@@ -75,6 +77,7 @@ enum mv88e6xxx_model {
 	MV88E6352,
 	MV88E6390,
 	MV88E6390X,
+	MV88E6393X,
 };
 
 enum mv88e6xxx_family {
@@ -90,6 +93,7 @@ enum mv88e6xxx_family {
 	MV88E6XXX_FAMILY_6351,	/* 6171 6175 6350 6351 */
 	MV88E6XXX_FAMILY_6352,	/* 6172 6176 6240 6352 */
 	MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
+	MV88E6XXX_FAMILY_6393,	/* 6191X 6193X 6393X */
 };
 
 struct mv88e6xxx_ops;
@@ -513,30 +517,30 @@ struct mv88e6xxx_ops {
 	int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip);
 
 	/* Power on/off a SERDES interface */
-	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, u8 lane,
+	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, int lane,
 			    bool up);
 
 	/* SERDES lane mapping */
-	u8 (*serdes_get_lane)(struct mv88e6xxx_chip *chip, int port);
+	int (*serdes_get_lane)(struct mv88e6xxx_chip *chip, int port);
 
 	int (*serdes_pcs_get_state)(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane, struct phylink_link_state *state);
+				    int lane, struct phylink_link_state *state);
 	int (*serdes_pcs_config)(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, unsigned int mode,
+				 int lane, unsigned int mode,
 				 phy_interface_t interface,
 				 const unsigned long *advertise);
 	int (*serdes_pcs_an_restart)(struct mv88e6xxx_chip *chip, int port,
-				     u8 lane);
+				     int lane);
 	int (*serdes_pcs_link_up)(struct mv88e6xxx_chip *chip, int port,
-				  u8 lane, int speed, int duplex);
+				  int lane, int speed, int duplex);
 
 	/* SERDES interrupt handling */
 	unsigned int (*serdes_irq_mapping)(struct mv88e6xxx_chip *chip,
 					   int port);
-	int (*serdes_irq_enable)(struct mv88e6xxx_chip *chip, int port, u8 lane,
+	int (*serdes_irq_enable)(struct mv88e6xxx_chip *chip, int port, int lane,
 				 bool enable);
 	irqreturn_t (*serdes_irq_status)(struct mv88e6xxx_chip *chip, int port,
-					 u8 lane);
+					 int lane);
 
 	/* Statistics from the SERDES interface */
 	int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 33d443a37efc..815b0f681d69 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -315,7 +315,6 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 				 enum mv88e6xxx_egress_direction direction,
 				 int port)
 {
-	int *dest_port_chip;
 	u16 reg;
 	int err;
 
@@ -325,13 +324,11 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 
 	switch (direction) {
 	case MV88E6XXX_EGRESS_DIR_INGRESS:
-		dest_port_chip = &chip->ingress_dest_port;
 		reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK;
 		reg |= port <<
 		       __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK);
 		break;
 	case MV88E6XXX_EGRESS_DIR_EGRESS:
-		dest_port_chip = &chip->egress_dest_port;
 		reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK;
 		reg |= port <<
 		       __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK);
@@ -340,11 +337,7 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 		return -EINVAL;
 	}
 
-	err = mv88e6xxx_g1_write(chip, MV88E6185_G1_MONITOR_CTL, reg);
-	if (!err)
-		*dest_port_chip = port;
-
-	return err;
+	return mv88e6xxx_g1_write(chip, MV88E6185_G1_MONITOR_CTL, reg);
 }
 
 /* Older generations also call this the ARP destination. It has been
@@ -380,28 +373,20 @@ int mv88e6390_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 				 enum mv88e6xxx_egress_direction direction,
 				 int port)
 {
-	int *dest_port_chip;
 	u16 ptr;
-	int err;
 
 	switch (direction) {
 	case MV88E6XXX_EGRESS_DIR_INGRESS:
-		dest_port_chip = &chip->ingress_dest_port;
 		ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST;
 		break;
 	case MV88E6XXX_EGRESS_DIR_EGRESS:
-		dest_port_chip = &chip->egress_dest_port;
 		ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	err = mv88e6390_g1_monitor_write(chip, ptr, port);
-	if (!err)
-		*dest_port_chip = port;
-
-	return err;
+	return mv88e6390_g1_monitor_write(chip, ptr, port);
 }
 
 int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port)
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 7c396964d0b2..4f3dbb015f77 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -22,6 +22,7 @@
 #define MV88E6185_G1_STS_PPU_STATE_DISABLED		0x8000
 #define MV88E6185_G1_STS_PPU_STATE_POLLING		0xc000
 #define MV88E6XXX_G1_STS_INIT_READY			0x0800
+#define MV88E6393X_G1_STS_IRQ_DEVICE_2			9
 #define MV88E6XXX_G1_STS_IRQ_AVB			8
 #define MV88E6XXX_G1_STS_IRQ_DEVICE			7
 #define MV88E6XXX_G1_STS_IRQ_STATS			6
@@ -59,6 +60,7 @@
 #define MV88E6185_G1_CTL1_SCHED_PRIO		0x0800
 #define MV88E6185_G1_CTL1_MAX_FRAME_1632	0x0400
 #define MV88E6185_G1_CTL1_RELOAD_EEPROM		0x0200
+#define MV88E6393X_G1_CTL1_DEVICE2_EN		0x0200
 #define MV88E6XXX_G1_CTL1_DEVICE_EN		0x0080
 #define MV88E6XXX_G1_CTL1_STATS_DONE_EN		0x0040
 #define MV88E6XXX_G1_CTL1_VTU_PROBLEM_EN	0x0020
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 4127f82275ad..c78769cdbb59 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -38,9 +38,15 @@
 /* Offset 0x02: MGMT Enable Register 2x */
 #define MV88E6XXX_G2_MGMT_EN_2X		0x02
 
+/* Offset 0x02: MAC LINK change IRQ Register for MV88E6393X */
+#define MV88E6393X_G2_MACLINK_INT_SRC		0x02
+
 /* Offset 0x03: MGMT Enable Register 0x */
 #define MV88E6XXX_G2_MGMT_EN_0X		0x03
 
+/* Offset 0x03: MAC LINK change IRQ Mask Register for MV88E6393X */
+#define MV88E6393X_G2_MACLINK_INT_MASK		0x03
+
 /* Offset 0x04: Flow Control Delay Register */
 #define MV88E6XXX_G2_FLOW_CTL	0x04
 
@@ -52,6 +58,8 @@
 #define MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI	0x0080
 #define MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU		0x0008
 
+#define MV88E6393X_G2_EGRESS_MONITOR_DEST		0x05
+
 /* Offset 0x06: Device Mapping Table Register */
 #define MV88E6XXX_G2_DEVICE_MAPPING		0x06
 #define MV88E6XXX_G2_DEVICE_MAPPING_UPDATE	0x8000
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
index 7c2c67405322..eda710062933 100644
--- a/drivers/net/dsa/mv88e6xxx/global2_scratch.c
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -42,7 +42,7 @@ static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
 }
 
 /**
- * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
+ * mv88e6xxx_g2_scratch_get_bit - get a bit
  * @chip: chip private data
  * @base_reg: base of scratch bits
  * @offset: index of bit within the register
@@ -67,7 +67,7 @@ static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
 }
 
 /**
- * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
+ * mv88e6xxx_g2_scratch_set_bit - set (or clear) a bit
  * @chip: chip private data
  * @base_reg: base of scratch bits
  * @offset: index of bit within the register
@@ -240,7 +240,7 @@ const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
 };
 
 /**
- * mv88e6xxx_g2_gpio_set_smi - set gpio muxing for external smi
+ * mv88e6xxx_g2_scratch_gpio_set_smi - set gpio muxing for external smi
  * @chip: chip private data
  * @external: set mux for external smi, or free for gpio usage
  *
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 4561f289ab76..f77e2ee64a60 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -14,6 +14,7 @@
 #include <linux/phylink.h>
 
 #include "chip.h"
+#include "global2.h"
 #include "port.h"
 #include "serdes.h"
 
@@ -25,6 +26,14 @@ int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
 	return mv88e6xxx_read(chip, addr, reg, val);
 }
 
+int mv88e6xxx_port_wait_bit(struct mv88e6xxx_chip *chip, int port, int reg,
+			    int bit, int val)
+{
+	int addr = chip->info->port_base_addr + port;
+
+	return mv88e6xxx_wait_bit(chip, addr, reg, bit, val);
+}
+
 int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
 			 u16 val)
 {
@@ -426,11 +435,111 @@ phy_interface_t mv88e6390x_port_max_speed_mode(int port)
 	return PHY_INTERFACE_MODE_NA;
 }
 
+/* Support 10, 100, 200, 1000, 2500, 5000, 10000 Mbps (e.g. 88E6393X)
+ * Function mv88e6xxx_port_set_speed_duplex() can't be used as the register
+ * values for speeds 2500 & 5000 conflict.
+ */
+int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
+				     int speed, int duplex)
+{
+	u16 reg, ctrl;
+	int err;
+
+	if (speed == SPEED_MAX)
+		speed = (port > 0 && port < 9) ? 1000 : 10000;
+
+	if (speed == 200 && port != 0)
+		return -EOPNOTSUPP;
+
+	if (speed >= 2500 && port > 0 && port < 9)
+		return -EOPNOTSUPP;
+
+	switch (speed) {
+	case 10:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_10;
+		break;
+	case 100:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_100;
+		break;
+	case 200:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_100 |
+			MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		break;
+	case 1000:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000;
+		break;
+	case 2500:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000 |
+			MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		break;
+	case 5000:
+		ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 |
+			MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		break;
+	case 10000:
+	case SPEED_UNFORCED:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_UNFORCED;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	switch (duplex) {
+	case DUPLEX_HALF:
+		ctrl |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX;
+		break;
+	case DUPLEX_FULL:
+		ctrl |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX |
+			MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL;
+		break;
+	case DUPLEX_UNFORCED:
+		/* normal duplex detection */
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_MAC_CTL, &reg);
+	if (err)
+		return err;
+
+	reg &= ~(MV88E6XXX_PORT_MAC_CTL_SPEED_MASK |
+		 MV88E6390_PORT_MAC_CTL_ALTSPEED |
+		 MV88E6390_PORT_MAC_CTL_FORCE_SPEED);
+
+	if (speed != SPEED_UNFORCED)
+		reg |= MV88E6390_PORT_MAC_CTL_FORCE_SPEED;
+
+	reg |= ctrl;
+
+	err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_MAC_CTL, reg);
+	if (err)
+		return err;
+
+	if (speed)
+		dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed);
+	else
+		dev_dbg(chip->dev, "p%d: Speed unforced\n", port);
+	dev_dbg(chip->dev, "p%d: %s %s duplex\n", port,
+		reg & MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX ? "Force" : "Unforce",
+		reg & MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL ? "full" : "half");
+
+	return 0;
+}
+
+phy_interface_t mv88e6393x_port_max_speed_mode(int port)
+{
+	if (port == 0 || port == 9 || port == 10)
+		return PHY_INTERFACE_MODE_10GBASER;
+
+	return PHY_INTERFACE_MODE_NA;
+}
+
 static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 				    phy_interface_t mode, bool force)
 {
-	u8 lane;
 	u16 cmode;
+	int lane;
 	u16 reg;
 	int err;
 
@@ -450,6 +559,9 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 	case PHY_INTERFACE_MODE_2500BASEX:
 		cmode = MV88E6XXX_PORT_STS_CMODE_2500BASEX;
 		break;
+	case PHY_INTERFACE_MODE_5GBASER:
+		cmode = MV88E6393X_PORT_STS_CMODE_5GBASER;
+		break;
 	case PHY_INTERFACE_MODE_XGMII:
 	case PHY_INTERFACE_MODE_XAUI:
 		cmode = MV88E6XXX_PORT_STS_CMODE_XAUI;
@@ -457,6 +569,9 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 	case PHY_INTERFACE_MODE_RXAUI:
 		cmode = MV88E6XXX_PORT_STS_CMODE_RXAUI;
 		break;
+	case PHY_INTERFACE_MODE_10GBASER:
+		cmode = MV88E6393X_PORT_STS_CMODE_10GBASER;
+		break;
 	default:
 		cmode = 0;
 	}
@@ -466,7 +581,7 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		return 0;
 
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane) {
+	if (lane >= 0) {
 		if (chip->ports[port].serdes_irq) {
 			err = mv88e6xxx_serdes_irq_disable(chip, port, lane);
 			if (err)
@@ -495,8 +610,8 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		chip->ports[port].cmode = cmode;
 
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (!lane)
-			return -ENODEV;
+		if (lane < 0)
+			return lane;
 
 		err = mv88e6xxx_serdes_power_up(chip, port, lane);
 		if (err)
@@ -541,6 +656,29 @@ int mv88e6390_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 	return mv88e6xxx_port_set_cmode(chip, port, mode, false);
 }
 
+int mv88e6393x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
+			      phy_interface_t mode)
+{
+	int err;
+	u16 reg;
+
+	if (port != 0 && port != 9 && port != 10)
+		return -EOPNOTSUPP;
+
+	/* mv88e6393x errata 4.5: EEE should be disabled on SERDES ports */
+	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_MAC_CTL, &reg);
+	if (err)
+		return err;
+
+	reg &= ~MV88E6XXX_PORT_MAC_CTL_EEE;
+	reg |= MV88E6XXX_PORT_MAC_CTL_FORCE_EEE;
+	err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_MAC_CTL, reg);
+	if (err)
+		return err;
+
+	return mv88e6xxx_port_set_cmode(chip, port, mode, false);
+}
+
 static int mv88e6341_port_set_cmode_writable(struct mv88e6xxx_chip *chip,
 					     int port)
 {
@@ -1171,6 +1309,27 @@ int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port)
 				    0x0001);
 }
 
+/* Offset 0x0B: Port Association Vector */
+
+int mv88e6xxx_port_set_assoc_vector(struct mv88e6xxx_chip *chip, int port,
+				    u16 pav)
+{
+	u16 reg, mask;
+	int err;
+
+	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR,
+				  &reg);
+	if (err)
+		return err;
+
+	mask = mv88e6xxx_port_mask(chip);
+	reg &= ~mask;
+	reg |= pav & mask;
+
+	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR,
+				    reg);
+}
+
 /* Offset 0x0C: Port ATU Control */
 
 int mv88e6xxx_port_disable_learn_limit(struct mv88e6xxx_chip *chip, int port)
@@ -1185,6 +1344,156 @@ int mv88e6xxx_port_disable_pri_override(struct mv88e6xxx_chip *chip, int port)
 	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_PRI_OVERRIDE, 0);
 }
 
+/* Offset 0x0E: Policy & MGMT Control Register for FAMILY 6191X 6193X 6393X */
+
+static int mv88e6393x_port_policy_read(struct mv88e6xxx_chip *chip, int port,
+				       u16 pointer, u8 *data)
+{
+	u16 reg;
+	int err;
+
+	err = mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_POLICY_MGMT_CTL,
+				   pointer);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_port_read(chip, port, MV88E6393X_PORT_POLICY_MGMT_CTL,
+				  &reg);
+	if (err)
+		return err;
+
+	*data = reg;
+
+	return 0;
+}
+
+static int mv88e6393x_port_policy_write(struct mv88e6xxx_chip *chip, int port,
+					u16 pointer, u8 data)
+{
+	u16 reg;
+
+	reg = MV88E6393X_PORT_POLICY_MGMT_CTL_UPDATE | pointer | data;
+
+	return mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_POLICY_MGMT_CTL,
+				    reg);
+}
+
+static int mv88e6393x_port_policy_write_all(struct mv88e6xxx_chip *chip,
+					    u16 pointer, u8 data)
+{
+	int err, port;
+
+	for (port = 0; port < mv88e6xxx_num_ports(chip); port++) {
+		if (dsa_is_unused_port(chip->ds, port))
+			continue;
+
+		err = mv88e6393x_port_policy_write(chip, port, pointer, data);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int mv88e6393x_set_egress_port(struct mv88e6xxx_chip *chip,
+			       enum mv88e6xxx_egress_direction direction,
+			       int port)
+{
+	u16 ptr;
+	int err;
+
+	switch (direction) {
+	case MV88E6XXX_EGRESS_DIR_INGRESS:
+		ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_INGRESS_DEST;
+		err = mv88e6393x_port_policy_write_all(chip, ptr, port);
+		if (err)
+			return err;
+		break;
+	case MV88E6XXX_EGRESS_DIR_EGRESS:
+		ptr = MV88E6393X_G2_EGRESS_MONITOR_DEST;
+		err = mv88e6xxx_g2_write(chip, ptr, port);
+		if (err)
+			return err;
+		break;
+	}
+
+	return 0;
+}
+
+int mv88e6393x_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
+				      int upstream_port)
+{
+	u16 ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_CPU_DEST;
+	u8 data = MV88E6393X_PORT_POLICY_MGMT_CTL_CPU_DEST_MGMTPRI |
+		  upstream_port;
+
+	return mv88e6393x_port_policy_write(chip, port, ptr, data);
+}
+
+int mv88e6393x_port_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	u16 ptr;
+	int err;
+
+	/* Consider the frames with reserved multicast destination
+	 * addresses matching 01:80:c2:00:00:00 and
+	 * 01:80:c2:00:00:02 as MGMT.
+	 */
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XLO;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XHI;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XLO;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XHI;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/* Offset 0x10 & 0x11: EPC */
+
+static int mv88e6393x_port_epc_wait_ready(struct mv88e6xxx_chip *chip, int port)
+{
+	int bit = __bf_shf(MV88E6393X_PORT_EPC_CMD_BUSY);
+
+	return mv88e6xxx_port_wait_bit(chip, port, MV88E6393X_PORT_EPC_CMD, bit, 0);
+}
+
+/* Port Ether type for 6393X family */
+
+int mv88e6393x_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
+				   u16 etype)
+{
+	u16 val;
+	int err;
+
+	err = mv88e6393x_port_epc_wait_ready(chip, port);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_EPC_DATA, etype);
+	if (err)
+		return err;
+
+	val = MV88E6393X_PORT_EPC_CMD_BUSY |
+	      MV88E6393X_PORT_EPC_CMD_WRITE |
+	      MV88E6393X_PORT_EPC_INDEX_PORT_ETYPE;
+
+	return mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_EPC_CMD, val);
+}
+
 /* Offset 0x0f: Port Ether type */
 
 int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
@@ -1259,46 +1568,43 @@ int mv88e6390_port_tag_remap(struct mv88e6xxx_chip *chip, int port)
 
 /* Offset 0x0E: Policy Control Register */
 
-int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
-			      enum mv88e6xxx_policy_mapping mapping,
-			      enum mv88e6xxx_policy_action action)
+static int
+mv88e6xxx_port_policy_mapping_get_pos(enum mv88e6xxx_policy_mapping mapping,
+				      enum mv88e6xxx_policy_action action,
+				      u16 *mask, u16 *val, int *shift)
 {
-	u16 reg, mask, val;
-	int shift;
-	int err;
-
 	switch (mapping) {
 	case MV88E6XXX_POLICY_MAPPING_DA:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_DA_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_DA_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_DA_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_DA_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_SA:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_SA_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_SA_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_SA_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_SA_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_VTU:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VTU_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_VTU_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VTU_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_VTU_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_ETYPE:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_PPPOE:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_VBAS:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_OPT82:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_UDP:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_UDP_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_UDP_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_UDP_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_UDP_MASK;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -1306,21 +1612,37 @@ int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
 
 	switch (action) {
 	case MV88E6XXX_POLICY_ACTION_NORMAL:
-		val = MV88E6XXX_PORT_POLICY_CTL_NORMAL;
+		*val = MV88E6XXX_PORT_POLICY_CTL_NORMAL;
 		break;
 	case MV88E6XXX_POLICY_ACTION_MIRROR:
-		val = MV88E6XXX_PORT_POLICY_CTL_MIRROR;
+		*val = MV88E6XXX_PORT_POLICY_CTL_MIRROR;
 		break;
 	case MV88E6XXX_POLICY_ACTION_TRAP:
-		val = MV88E6XXX_PORT_POLICY_CTL_TRAP;
+		*val = MV88E6XXX_PORT_POLICY_CTL_TRAP;
 		break;
 	case MV88E6XXX_POLICY_ACTION_DISCARD:
-		val = MV88E6XXX_PORT_POLICY_CTL_DISCARD;
+		*val = MV88E6XXX_PORT_POLICY_CTL_DISCARD;
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
+	return 0;
+}
+
+int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
+			      enum mv88e6xxx_policy_mapping mapping,
+			      enum mv88e6xxx_policy_action action)
+{
+	u16 reg, mask, val;
+	int shift;
+	int err;
+
+	err = mv88e6xxx_port_policy_mapping_get_pos(mapping, action, &mask,
+						    &val, &shift);
+	if (err)
+		return err;
+
 	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_POLICY_CTL, &reg);
 	if (err)
 		return err;
@@ -1330,3 +1652,37 @@ int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
 
 	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_POLICY_CTL, reg);
 }
+
+int mv88e6393x_port_set_policy(struct mv88e6xxx_chip *chip, int port,
+			       enum mv88e6xxx_policy_mapping mapping,
+			       enum mv88e6xxx_policy_action action)
+{
+	u16 mask, val;
+	int shift;
+	int err;
+	u16 ptr;
+	u8 reg;
+
+	err = mv88e6xxx_port_policy_mapping_get_pos(mapping, action, &mask,
+						    &val, &shift);
+	if (err)
+		return err;
+
+	/* The 16-bit Port Policy CTL register from older chips is on 6393x
+	 * changed to Port Policy MGMT CTL, which can access more data, but
+	 * indirectly. The original 16-bit value is divided into two 8-bit
+	 * registers.
+	 */
+	ptr = shift / 8;
+	shift %= 8;
+	mask >>= ptr * 8;
+
+	err = mv88e6393x_port_policy_read(chip, port, ptr, &reg);
+	if (err)
+		return err;
+
+	reg &= ~mask;
+	reg |= (val << shift) & mask;
+
+	return mv88e6393x_port_policy_write(chip, port, ptr, reg);
+}
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index e6d0eaa6aa1d..b10e5aebacf6 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -49,6 +49,9 @@
 #define MV88E6XXX_PORT_STS_CMODE_2500BASEX	0x000b
 #define MV88E6XXX_PORT_STS_CMODE_XAUI		0x000c
 #define MV88E6XXX_PORT_STS_CMODE_RXAUI		0x000d
+#define MV88E6393X_PORT_STS_CMODE_5GBASER	0x000c
+#define MV88E6393X_PORT_STS_CMODE_10GBASER	0x000d
+#define MV88E6393X_PORT_STS_CMODE_USXGMII	0x000e
 #define MV88E6185_PORT_STS_CDUPLEX		0x0008
 #define MV88E6185_PORT_STS_CMODE_MASK		0x0007
 #define MV88E6185_PORT_STS_CMODE_GMII_FD	0x0000
@@ -68,6 +71,8 @@
 #define MV88E6390_PORT_MAC_CTL_FORCE_SPEED		0x2000
 #define MV88E6390_PORT_MAC_CTL_ALTSPEED			0x1000
 #define MV88E6352_PORT_MAC_CTL_200BASE			0x1000
+#define MV88E6XXX_PORT_MAC_CTL_EEE			0x0200
+#define MV88E6XXX_PORT_MAC_CTL_FORCE_EEE		0x0100
 #define MV88E6185_PORT_MAC_CTL_AN_EN			0x0400
 #define MV88E6185_PORT_MAC_CTL_AN_RESTART		0x0200
 #define MV88E6185_PORT_MAC_CTL_AN_DONE			0x0100
@@ -117,6 +122,8 @@
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6176	0x1760
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6190	0x1900
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6191	0x1910
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6191X	0x1920
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6193X	0x1930
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6185	0x1a70
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6220	0x2200
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6240	0x2400
@@ -129,6 +136,7 @@
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6350	0x3710
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6351	0x3750
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6390	0x3900
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6393X	0x3930
 #define MV88E6XXX_PORT_SWITCH_ID_REV_MASK	0x000f
 
 /* Offset 0x04: Port Control Register */
@@ -236,6 +244,19 @@
 #define MV88E6XXX_PORT_POLICY_CTL_TRAP		0x0002
 #define MV88E6XXX_PORT_POLICY_CTL_DISCARD	0x0003
 
+/* Offset 0x0E: Policy & MGMT Control Register (FAMILY_6393X) */
+#define MV88E6393X_PORT_POLICY_MGMT_CTL				0x0e
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_UPDATE			0x8000
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_MASK		0x3f00
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_DATA_MASK		0x00ff
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XLO	0x2000
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XHI	0x2100
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XLO	0x2400
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XHI	0x2500
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_INGRESS_DEST	0x3000
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_CPU_DEST		0x3800
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_CPU_DEST_MGMTPRI	0x00e0
+
 /* Offset 0x0F: Port Special Ether Type */
 #define MV88E6XXX_PORT_ETH_TYPE		0x0f
 #define MV88E6XXX_PORT_ETH_TYPE_DEFAULT	0x9100
@@ -243,6 +264,15 @@
 /* Offset 0x10: InDiscards Low Counter */
 #define MV88E6XXX_PORT_IN_DISCARD_LO	0x10
 
+/* Offset 0x10: Extended Port Control Command */
+#define MV88E6393X_PORT_EPC_CMD		0x10
+#define MV88E6393X_PORT_EPC_CMD_BUSY	0x8000
+#define MV88E6393X_PORT_EPC_CMD_WRITE	0x0300
+#define MV88E6393X_PORT_EPC_INDEX_PORT_ETYPE	0x02
+
+/* Offset 0x11: Extended Port Control Data */
+#define MV88E6393X_PORT_EPC_DATA	0x11
+
 /* Offset 0x11: InDiscards High Counter */
 #define MV88E6XXX_PORT_IN_DISCARD_HI	0x11
 
@@ -288,6 +318,8 @@ int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
 			u16 *val);
 int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
 			 u16 val);
+int mv88e6xxx_port_wait_bit(struct mv88e6xxx_chip *chip, int port, int reg,
+			    int bit, int val);
 
 int mv88e6185_port_set_pause(struct mv88e6xxx_chip *chip, int port,
 			     int pause);
@@ -315,10 +347,13 @@ int mv88e6390_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
 				    int speed, int duplex);
 int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
 				     int speed, int duplex);
+int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
+				     int speed, int duplex);
 
 phy_interface_t mv88e6341_port_max_speed_mode(int port);
 phy_interface_t mv88e6390_port_max_speed_mode(int port);
 phy_interface_t mv88e6390x_port_max_speed_mode(int port);
+phy_interface_t mv88e6393x_port_max_speed_mode(int port);
 
 int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state);
 
@@ -351,8 +386,19 @@ int mv88e6352_port_set_mcast_flood(struct mv88e6xxx_chip *chip, int port,
 int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
 			      enum mv88e6xxx_policy_mapping mapping,
 			      enum mv88e6xxx_policy_action action);
+int mv88e6393x_port_set_policy(struct mv88e6xxx_chip *chip, int port,
+			       enum mv88e6xxx_policy_mapping mapping,
+			       enum mv88e6xxx_policy_action action);
 int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
 				  u16 etype);
+int mv88e6393x_set_egress_port(struct mv88e6xxx_chip *chip,
+			       enum mv88e6xxx_egress_direction direction,
+			       int port);
+int mv88e6393x_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
+				      int upstream_port);
+int mv88e6393x_port_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+int mv88e6393x_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
+				   u16 etype);
 int mv88e6xxx_port_set_message_port(struct mv88e6xxx_chip *chip, int port,
 				    bool message_port);
 int mv88e6xxx_port_set_trunk(struct mv88e6xxx_chip *chip, int port,
@@ -361,6 +407,8 @@ int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port,
 				  size_t size);
 int mv88e6095_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
 int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
+int mv88e6xxx_port_set_assoc_vector(struct mv88e6xxx_chip *chip, int port,
+				    u16 pav);
 int mv88e6097_port_pause_limit(struct mv88e6xxx_chip *chip, int port, u8 in,
 			       u8 out);
 int mv88e6390_port_pause_limit(struct mv88e6xxx_chip *chip, int port, u8 in,
@@ -371,6 +419,8 @@ int mv88e6390_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 			     phy_interface_t mode);
 int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 			      phy_interface_t mode);
+int mv88e6393x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
+			      phy_interface_t mode);
 int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
 int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
 int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port);
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 3195936dc5be..470856bcd2f3 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -95,7 +95,7 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
 	return 0;
 }
 
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up)
 {
 	u16 val, new_val;
@@ -117,7 +117,7 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
 }
 
 int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise)
 {
@@ -166,7 +166,7 @@ int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state)
+				   int lane, struct phylink_link_state *state)
 {
 	u16 lpa, status;
 	int err;
@@ -187,7 +187,7 @@ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane)
+				    int lane)
 {
 	u16 bmcr;
 	int err;
@@ -200,7 +200,7 @@ int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex)
+				 int lane, int speed, int duplex)
 {
 	u16 val, bmcr;
 	int err;
@@ -230,10 +230,10 @@ int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
 	return mv88e6352_serdes_write(chip, MII_BMCR, bmcr);
 }
 
-u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASEX) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX) ||
@@ -245,7 +245,7 @@ u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 
 static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
-	if (mv88e6xxx_serdes_get_lane(chip, port))
+	if (mv88e6xxx_serdes_get_lane(chip, port) >= 0)
 		return true;
 
 	return false;
@@ -354,7 +354,7 @@ static void mv88e6352_serdes_irq_link(struct mv88e6xxx_chip *chip, int port)
 }
 
 irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	irqreturn_t ret = IRQ_NONE;
 	u16 status;
@@ -372,7 +372,7 @@ irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 	return ret;
 }
 
-int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable)
 {
 	u16 val = 0;
@@ -413,10 +413,10 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
 	}
 }
 
-u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	switch (port) {
 	case 5:
@@ -430,7 +430,7 @@ u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	return lane;
 }
 
-int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up)
 {
 	/* The serdes power can't be controlled on this switch chip but we need
@@ -440,7 +440,7 @@ int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
 	return 0;
 }
 
-u8 mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	/* There are no configurable serdes lanes on this switch chip but we
 	 * need to return non-zero so that callers of
@@ -456,7 +456,7 @@ u8 mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 }
 
 int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state)
+				   int lane, struct phylink_link_state *state)
 {
 	int err;
 	u16 status;
@@ -492,7 +492,7 @@ int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 	return 0;
 }
 
-int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable)
 {
 	u8 cmode = chip->ports[port].cmode;
@@ -525,7 +525,7 @@ static void mv88e6097_serdes_irq_link(struct mv88e6xxx_chip *chip, int port)
 }
 
 irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	u8 cmode = chip->ports[port].cmode;
 
@@ -539,10 +539,10 @@ irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 	return IRQ_NONE;
 }
 
-u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	switch (port) {
 	case 9:
@@ -562,12 +562,12 @@ u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	return lane;
 }
 
-u8 mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode_port = chip->ports[port].cmode;
 	u8 cmode_port10 = chip->ports[10].cmode;
 	u8 cmode_port9 = chip->ports[9].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	switch (port) {
 	case 2:
@@ -637,8 +637,29 @@ u8 mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	return lane;
 }
 
+/* Only Ports 0, 9 and 10 have SERDES lanes. Return the SERDES lane address
+ * a port is using else Returns -ENODEV.
+ */
+int mv88e6393x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+{
+	u8 cmode = chip->ports[port].cmode;
+	int lane = -ENODEV;
+
+	if (port != 0 && port != 9 && port != 10)
+		return -EOPNOTSUPP;
+
+	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
+	    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
+	    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
+	    cmode == MV88E6393X_PORT_STS_CMODE_5GBASER ||
+	    cmode == MV88E6393X_PORT_STS_CMODE_10GBASER)
+		lane = port;
+
+	return lane;
+}
+
 /* Set power up/down for 10GBASE-R and 10GBASE-X4/X2 */
-static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, u8 lane,
+static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, int lane,
 				      bool up)
 {
 	u16 val, new_val;
@@ -665,7 +686,7 @@ static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, u8 lane,
 }
 
 /* Set power up/down for SGMII and 1000Base-X */
-static int mv88e6390_serdes_power_sgmii(struct mv88e6xxx_chip *chip, u8 lane,
+static int mv88e6390_serdes_power_sgmii(struct mv88e6xxx_chip *chip, int lane,
 					bool up)
 {
 	u16 val, new_val;
@@ -701,7 +722,7 @@ static struct mv88e6390_serdes_hw_stat mv88e6390_serdes_hw_stats[] = {
 
 int mv88e6390_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
 {
-	if (mv88e6390_serdes_get_lane(chip, port) == 0)
+	if (mv88e6390_serdes_get_lane(chip, port) < 0)
 		return 0;
 
 	return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
@@ -713,7 +734,7 @@ int mv88e6390_serdes_get_strings(struct mv88e6xxx_chip *chip,
 	struct mv88e6390_serdes_hw_stat *stat;
 	int i;
 
-	if (mv88e6390_serdes_get_lane(chip, port) == 0)
+	if (mv88e6390_serdes_get_lane(chip, port) < 0)
 		return 0;
 
 	for (i = 0; i < ARRAY_SIZE(mv88e6390_serdes_hw_stats); i++) {
@@ -750,7 +771,7 @@ int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
 	int i;
 
 	lane = mv88e6390_serdes_get_lane(chip, port);
-	if (lane == 0)
+	if (lane < 0)
 		return 0;
 
 	for (i = 0; i < ARRAY_SIZE(mv88e6390_serdes_hw_stats); i++) {
@@ -761,7 +782,7 @@ int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
 	return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
 }
 
-static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, u8 lane)
+static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, int lane)
 {
 	u16 reg;
 	int err;
@@ -776,7 +797,7 @@ static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, u8 lane)
 				      MV88E6390_PG_CONTROL, reg);
 }
 
-int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up)
 {
 	u8 cmode = chip->ports[port].cmode;
@@ -801,7 +822,7 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
 }
 
 int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise)
 {
@@ -860,7 +881,7 @@ int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 }
 
 static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
-	int port, u8 lane, struct phylink_link_state *state)
+	int port, int lane, struct phylink_link_state *state)
 {
 	u16 lpa, status;
 	int err;
@@ -883,7 +904,7 @@ static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
 }
 
 static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
-	int port, u8 lane, struct phylink_link_state *state)
+	int port, int lane, struct phylink_link_state *state)
 {
 	u16 status;
 	int err;
@@ -902,8 +923,32 @@ static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
 	return 0;
 }
 
+static int mv88e6393x_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
+					       int port, int lane,
+					       struct phylink_link_state *state)
+{
+	u16 status;
+	int err;
+
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6390_10G_STAT1, &status);
+	if (err)
+		return err;
+
+	state->link = !!(status & MDIO_STAT1_LSTATUS);
+	if (state->link) {
+		if (state->interface == PHY_INTERFACE_MODE_5GBASER)
+			state->speed = SPEED_5000;
+		else
+			state->speed = SPEED_10000;
+		state->duplex = DUPLEX_FULL;
+	}
+
+	return 0;
+}
+
 int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state)
+				   int lane, struct phylink_link_state *state)
 {
 	switch (state->interface) {
 	case PHY_INTERFACE_MODE_SGMII:
@@ -921,8 +966,27 @@ int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 	}
 }
 
+int mv88e6393x_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
+				    int lane, struct phylink_link_state *state)
+{
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		return mv88e6390_serdes_pcs_get_state_sgmii(chip, port, lane,
+							    state);
+	case PHY_INTERFACE_MODE_5GBASER:
+	case PHY_INTERFACE_MODE_10GBASER:
+		return mv88e6393x_serdes_pcs_get_state_10g(chip, port, lane,
+							   state);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane)
+				    int lane)
 {
 	u16 bmcr;
 	int err;
@@ -938,7 +1002,7 @@ int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex)
+				 int lane, int speed, int duplex)
 {
 	u16 val, bmcr;
 	int err;
@@ -972,7 +1036,7 @@ int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
 }
 
 static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
-					    int port, u8 lane)
+					    int port, int lane)
 {
 	u16 bmsr;
 	int err;
@@ -988,8 +1052,25 @@ static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
 	dsa_port_phylink_mac_change(chip->ds, port, !!(bmsr & BMSR_LSTATUS));
 }
 
+static void mv88e6393x_serdes_irq_link_10g(struct mv88e6xxx_chip *chip,
+					   int port, u8 lane)
+{
+	u16 status;
+	int err;
+
+	/* If the link has dropped, we want to know about it. */
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6390_10G_STAT1, &status);
+	if (err) {
+		dev_err(chip->dev, "can't read Serdes STAT1: %d\n", err);
+		return;
+	}
+
+	dsa_port_phylink_mac_change(chip->ds, port, !!(status & MDIO_STAT1_LSTATUS));
+}
+
 static int mv88e6390_serdes_irq_enable_sgmii(struct mv88e6xxx_chip *chip,
-					     u8 lane, bool enable)
+					     int lane, bool enable)
 {
 	u16 val = 0;
 
@@ -1001,7 +1082,7 @@ static int mv88e6390_serdes_irq_enable_sgmii(struct mv88e6xxx_chip *chip,
 				      MV88E6390_SGMII_INT_ENABLE, val);
 }
 
-int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable)
 {
 	u8 cmode = chip->ports[port].cmode;
@@ -1017,7 +1098,7 @@ int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
 }
 
 static int mv88e6390_serdes_irq_status_sgmii(struct mv88e6xxx_chip *chip,
-					     u8 lane, u16 *status)
+					     int lane, u16 *status)
 {
 	int err;
 
@@ -1027,8 +1108,85 @@ static int mv88e6390_serdes_irq_status_sgmii(struct mv88e6xxx_chip *chip,
 	return err;
 }
 
+static int mv88e6393x_serdes_irq_enable_10g(struct mv88e6xxx_chip *chip,
+					    u8 lane, bool enable)
+{
+	u16 val = 0;
+
+	if (enable)
+		val |= MV88E6393X_10G_INT_LINK_CHANGE;
+
+	return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+				      MV88E6393X_10G_INT_ENABLE, val);
+}
+
+int mv88e6393x_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
+				 int lane, bool enable)
+{
+	u8 cmode = chip->ports[port].cmode;
+
+	switch (cmode) {
+	case MV88E6XXX_PORT_STS_CMODE_SGMII:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
+	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+		return mv88e6390_serdes_irq_enable_sgmii(chip, lane, enable);
+	case MV88E6393X_PORT_STS_CMODE_5GBASER:
+	case MV88E6393X_PORT_STS_CMODE_10GBASER:
+		return mv88e6393x_serdes_irq_enable_10g(chip, lane, enable);
+	}
+
+	return 0;
+}
+
+static int mv88e6393x_serdes_irq_status_10g(struct mv88e6xxx_chip *chip,
+					    u8 lane, u16 *status)
+{
+	int err;
+
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6393X_10G_INT_STATUS, status);
+
+	return err;
+}
+
+irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
+					 int lane)
+{
+	u8 cmode = chip->ports[port].cmode;
+	irqreturn_t ret = IRQ_NONE;
+	u16 status;
+	int err;
+
+	switch (cmode) {
+	case MV88E6XXX_PORT_STS_CMODE_SGMII:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
+	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+		err = mv88e6390_serdes_irq_status_sgmii(chip, lane, &status);
+		if (err)
+			return ret;
+		if (status & (MV88E6390_SGMII_INT_LINK_DOWN |
+			      MV88E6390_SGMII_INT_LINK_UP)) {
+			ret = IRQ_HANDLED;
+			mv88e6390_serdes_irq_link_sgmii(chip, port, lane);
+		}
+		break;
+	case MV88E6393X_PORT_STS_CMODE_5GBASER:
+	case MV88E6393X_PORT_STS_CMODE_10GBASER:
+		err = mv88e6393x_serdes_irq_status_10g(chip, lane, &status);
+		if (err)
+			return err;
+		if (status & MV88E6393X_10G_INT_LINK_CHANGE) {
+			ret = IRQ_HANDLED;
+			mv88e6393x_serdes_irq_link_10g(chip, port, lane);
+		}
+		break;
+	}
+
+	return ret;
+}
+
 irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	u8 cmode = chip->ports[port].cmode;
 	irqreturn_t ret = IRQ_NONE;
@@ -1087,7 +1245,7 @@ static const u16 mv88e6390_serdes_regs[] = {
 
 int mv88e6390_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port)
 {
-	if (mv88e6xxx_serdes_get_lane(chip, port) == 0)
+	if (mv88e6xxx_serdes_get_lane(chip, port) < 0)
 		return 0;
 
 	return ARRAY_SIZE(mv88e6390_serdes_regs) * sizeof(u16);
@@ -1102,7 +1260,7 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
 	int i;
 
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane == 0)
+	if (lane < 0)
 		return;
 
 	for (i = 0 ; i < ARRAY_SIZE(mv88e6390_serdes_regs); i++) {
@@ -1112,3 +1270,101 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
 			p[i] = reg;
 	}
 }
+
+static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
+{
+	u16 reg, pcs;
+	int err;
+
+	/* mv88e6393x family errata 4.6:
+	 * Cannot clear PwrDn bit on SERDES on port 0 if device is configured
+	 * CPU_MGD mode or P0_mode is configured for [x]MII.
+	 * Workaround: Set Port0 SERDES register 4.F002 bit 5=0 and bit 15=1.
+	 *
+	 * It seems that after this workaround the SERDES is automatically
+	 * powered up (the bit is cleared), so power it down.
+	 */
+	if (lane == MV88E6393X_PORT0_LANE) {
+		err = mv88e6390_serdes_read(chip, MV88E6393X_PORT0_LANE,
+					    MDIO_MMD_PHYXS,
+					    MV88E6393X_SERDES_POC, &reg);
+		if (err)
+			return err;
+
+		reg &= ~MV88E6393X_SERDES_POC_PDOWN;
+		reg |= MV88E6393X_SERDES_POC_RESET;
+
+		err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+					     MV88E6393X_SERDES_POC, reg);
+		if (err)
+			return err;
+
+		err = mv88e6390_serdes_power_sgmii(chip, lane, false);
+		if (err)
+			return err;
+	}
+
+	/* mv88e6393x family errata 4.8:
+	 * When a SERDES port is operating in 1000BASE-X or SGMII mode link may
+	 * not come up after hardware reset or software reset of SERDES core.
+	 * Workaround is to write SERDES register 4.F074.14=1 for only those
+	 * modes and 0 in all other modes.
+	 */
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6393X_SERDES_POC, &pcs);
+	if (err)
+		return err;
+
+	pcs &= MV88E6393X_SERDES_POC_PCS_MASK;
+
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6393X_ERRATA_4_8_REG, &reg);
+	if (err)
+		return err;
+
+	if (pcs == MV88E6393X_SERDES_POC_PCS_1000BASEX ||
+	    pcs == MV88E6393X_SERDES_POC_PCS_SGMII_PHY ||
+	    pcs == MV88E6393X_SERDES_POC_PCS_SGMII_MAC)
+		reg |= MV88E6393X_ERRATA_4_8_BIT;
+	else
+		reg &= ~MV88E6393X_ERRATA_4_8_BIT;
+
+	return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+				      MV88E6393X_ERRATA_4_8_REG, reg);
+}
+
+int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT0_LANE);
+	if (err)
+		return err;
+
+	err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT9_LANE);
+	if (err)
+		return err;
+
+	return mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT10_LANE);
+}
+
+int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
+			    bool on)
+{
+	u8 cmode = chip->ports[port].cmode;
+
+	if (port != 0 && port != 9 && port != 10)
+		return -EOPNOTSUPP;
+
+	switch (cmode) {
+	case MV88E6XXX_PORT_STS_CMODE_SGMII:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
+	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+		return mv88e6390_serdes_power_sgmii(chip, lane, on);
+	case MV88E6393X_PORT_STS_CMODE_5GBASER:
+	case MV88E6393X_PORT_STS_CMODE_10GBASER:
+		return mv88e6390_serdes_power_10g(chip, lane, on);
+	}
+
+	return 0;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 93822ef9bab8..cbb3ba30caea 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -42,6 +42,9 @@
 /* 10GBASE-R and 10GBASE-X4/X2 */
 #define MV88E6390_10G_CTRL1		(0x1000 + MDIO_CTRL1)
 #define MV88E6390_10G_STAT1		(0x1000 + MDIO_STAT1)
+#define MV88E6393X_10G_INT_ENABLE	0x9000
+#define MV88E6393X_10G_INT_LINK_CHANGE	BIT(2)
+#define MV88E6393X_10G_INT_STATUS	0x9001
 
 /* 1000BASE-X and SGMII */
 #define MV88E6390_SGMII_BMCR		(0x2000 + MII_BMCR)
@@ -73,55 +76,86 @@
 #define MV88E6390_PG_CONTROL		0xf010
 #define MV88E6390_PG_CONTROL_ENABLE_PC		BIT(0)
 
-u8 mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+#define MV88E6393X_PORT0_LANE			0x00
+#define MV88E6393X_PORT9_LANE			0x09
+#define MV88E6393X_PORT10_LANE			0x0a
+
+/* Port Operational Configuration */
+#define MV88E6393X_SERDES_POC			0xf002
+#define MV88E6393X_SERDES_POC_PCS_1000BASEX	0x0000
+#define MV88E6393X_SERDES_POC_PCS_2500BASEX	0x0001
+#define MV88E6393X_SERDES_POC_PCS_SGMII_PHY	0x0002
+#define MV88E6393X_SERDES_POC_PCS_SGMII_MAC	0x0003
+#define MV88E6393X_SERDES_POC_PCS_5GBASER	0x0004
+#define MV88E6393X_SERDES_POC_PCS_10GBASER	0x0005
+#define MV88E6393X_SERDES_POC_PCS_USXGMII_PHY	0x0006
+#define MV88E6393X_SERDES_POC_PCS_USXGMII_MAC	0x0007
+#define MV88E6393X_SERDES_POC_PCS_MASK		0x0007
+#define MV88E6393X_SERDES_POC_RESET		BIT(15)
+#define MV88E6393X_SERDES_POC_PDOWN		BIT(5)
+
+#define MV88E6393X_ERRATA_4_8_REG		0xF074
+#define MV88E6393X_ERRATA_4_8_BIT		BIT(14)
+
+int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6393x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise);
 int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise);
 int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state);
+				   int lane, struct phylink_link_state *state);
 int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state);
+				   int lane, struct phylink_link_state *state);
 int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state);
+				   int lane, struct phylink_link_state *state);
+int mv88e6393x_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
+				    int lane, struct phylink_link_state *state);
 int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane);
+				    int lane);
 int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane);
+				    int lane);
 int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex);
+				 int lane, int speed, int duplex);
 int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex);
+				 int lane, int speed, int duplex);
 unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip,
 					  int port);
 unsigned int mv88e6390_serdes_irq_mapping(struct mv88e6xxx_chip *chip,
 					  int port);
-int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up);
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool on);
-int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool on);
-int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
+			    bool on);
+int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip);
+int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
-int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
-int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
+int mv88e6393x_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
+				 int lane, bool enable);
 irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane);
+					int lane);
 irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane);
+					int lane);
 irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane);
+					int lane);
+irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
+					 int lane);
 int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
 int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
 				 int port, uint8_t *data);
@@ -138,18 +172,18 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p);
 int mv88e6390_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port);
 void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p);
 
-/* Return the (first) SERDES lane address a port is using, 0 otherwise. */
-static inline u8 mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip,
-					   int port)
+/* Return the (first) SERDES lane address a port is using, -errno otherwise. */
+static inline int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip,
+					    int port)
 {
 	if (!chip->info->ops->serdes_get_lane)
-		return 0;
+		return -EOPNOTSUPP;
 
 	return chip->info->ops->serdes_get_lane(chip, port);
 }
 
 static inline int mv88e6xxx_serdes_power_up(struct mv88e6xxx_chip *chip,
-					    int port, u8 lane)
+					    int port, int lane)
 {
 	if (!chip->info->ops->serdes_power)
 		return -EOPNOTSUPP;
@@ -158,7 +192,7 @@ static inline int mv88e6xxx_serdes_power_up(struct mv88e6xxx_chip *chip,
 }
 
 static inline int mv88e6xxx_serdes_power_down(struct mv88e6xxx_chip *chip,
-					      int port, u8 lane)
+					      int port, int lane)
 {
 	if (!chip->info->ops->serdes_power)
 		return -EOPNOTSUPP;
@@ -176,7 +210,7 @@ mv88e6xxx_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
 }
 
 static inline int mv88e6xxx_serdes_irq_enable(struct mv88e6xxx_chip *chip,
-					      int port, u8 lane)
+					      int port, int lane)
 {
 	if (!chip->info->ops->serdes_irq_enable)
 		return -EOPNOTSUPP;
@@ -185,7 +219,7 @@ static inline int mv88e6xxx_serdes_irq_enable(struct mv88e6xxx_chip *chip,
 }
 
 static inline int mv88e6xxx_serdes_irq_disable(struct mv88e6xxx_chip *chip,
-					       int port, u8 lane)
+					       int port, int lane)
 {
 	if (!chip->info->ops->serdes_irq_enable)
 		return -EOPNOTSUPP;
@@ -194,7 +228,7 @@ static inline int mv88e6xxx_serdes_irq_disable(struct mv88e6xxx_chip *chip,
 }
 
 static inline irqreturn_t
-mv88e6xxx_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, u8 lane)
+mv88e6xxx_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, int lane)
 {
 	if (!chip->info->ops->serdes_irq_status)
 		return IRQ_NONE;
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 628afb47b579..6b5442be0230 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -719,7 +719,9 @@ static int felix_bridge_join(struct dsa_switch *ds, int port,
 {
 	struct ocelot *ocelot = ds->priv;
 
-	return ocelot_port_bridge_join(ocelot, port, br);
+	ocelot_port_bridge_join(ocelot, port, br);
+
+	return 0;
 }
 
 static void felix_bridge_leave(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 12e76020bea3..973761132fc3 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -317,11 +317,16 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 	if (rc)
 		return rc;
 
-	rc = -EOPNOTSUPP;
-
 	flow_action_for_each(i, act, &rule->action) {
 		switch (act->id) {
 		case FLOW_ACTION_POLICE:
+			if (act->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "QoS offload not support packets per second");
+				rc = -EOPNOTSUPP;
+				goto out;
+			}
+
 			rc = sja1105_flower_policer(priv, port, extack, cookie,
 						    &key,
 						    act->police.rate_bytes_ps,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 51ea104c63bb..d9c198ca0197 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3049,21 +3049,6 @@ static void sja1105_teardown(struct dsa_switch *ds)
 	}
 }
 
-static int sja1105_port_enable(struct dsa_switch *ds, int port,
-			       struct phy_device *phy)
-{
-	struct net_device *slave;
-
-	if (!dsa_is_user_port(ds, port))
-		return 0;
-
-	slave = dsa_to_port(ds, port)->slave;
-
-	slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
-
-	return 0;
-}
-
 static void sja1105_port_disable(struct dsa_switch *ds, int port)
 {
 	struct sja1105_private *priv = ds->priv;
@@ -3491,7 +3476,6 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
 	.get_ethtool_stats	= sja1105_get_ethtool_stats,
 	.get_sset_count		= sja1105_get_sset_count,
 	.get_ts_info		= sja1105_get_ts_info,
-	.port_enable		= sja1105_port_enable,
 	.port_disable		= sja1105_port_disable,
 	.port_fdb_dump		= sja1105_fdb_dump,
 	.port_fdb_add		= sja1105_fdb_add,
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 53e1f7e07959..96cc5fc36eb5 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -1051,6 +1051,7 @@ el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd)
 		break;
 	case 3:
 		cmd->base.port = PORT_BNC;
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index ad04660b97b8..4b85f2b74872 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -19,6 +19,7 @@ config SUNGEM_PHY
 	tristate
 
 source "drivers/net/ethernet/3com/Kconfig"
+source "drivers/net/ethernet/actions/Kconfig"
 source "drivers/net/ethernet/adaptec/Kconfig"
 source "drivers/net/ethernet/aeroflex/Kconfig"
 source "drivers/net/ethernet/agere/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 1e7dc8a7762d..9394493e8187 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -5,6 +5,7 @@
 
 obj-$(CONFIG_NET_VENDOR_3COM) += 3com/
 obj-$(CONFIG_NET_VENDOR_8390) += 8390/
+obj-$(CONFIG_NET_VENDOR_ACTIONS) += actions/
 obj-$(CONFIG_NET_VENDOR_ADAPTEC) += adaptec/
 obj-$(CONFIG_GRETH) += aeroflex/
 obj-$(CONFIG_NET_VENDOR_AGERE) += agere/
diff --git a/drivers/net/ethernet/actions/Kconfig b/drivers/net/ethernet/actions/Kconfig
new file mode 100644
index 000000000000..ccad6a3f4d6f
--- /dev/null
+++ b/drivers/net/ethernet/actions/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config NET_VENDOR_ACTIONS
+	bool "Actions Semi devices"
+	default y
+	depends on ARCH_ACTIONS
+	help
+	  If you have a network (Ethernet) card belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about Actions Semi devices.  If you say Y, you will be
+	  asked for your specific card in the following questions.
+
+if NET_VENDOR_ACTIONS
+
+config OWL_EMAC
+	tristate "Actions Semi Owl Ethernet MAC support"
+	select PHYLIB
+	help
+	  This driver supports the Actions Semi Ethernet Media Access
+	  Controller (EMAC) found on the S500 and S900 SoCs.  The controller
+	  is compliant with the IEEE 802.3 CSMA/CD standard and supports
+	  both half-duplex and full-duplex operation modes at 10/100 Mb/s.
+
+endif # NET_VENDOR_ACTIONS
diff --git a/drivers/net/ethernet/actions/Makefile b/drivers/net/ethernet/actions/Makefile
new file mode 100644
index 000000000000..fde8001d538a
--- /dev/null
+++ b/drivers/net/ethernet/actions/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Actions Semi Owl SoCs built-in ethernet macs
+#
+
+obj-$(CONFIG_OWL_EMAC) += owl-emac.o
diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c
new file mode 100644
index 000000000000..b8e771c2bc40
--- /dev/null
+++ b/drivers/net/ethernet/actions/owl-emac.c
@@ -0,0 +1,1625 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Actions Semi Owl SoCs Ethernet MAC driver
+ *
+ * Copyright (c) 2012 Actions Semi Inc.
+ * Copyright (c) 2021 Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/reset.h>
+
+#include "owl-emac.h"
+
+#define OWL_EMAC_DEFAULT_MSG_ENABLE	(NETIF_MSG_DRV | \
+					 NETIF_MSG_PROBE | \
+					 NETIF_MSG_LINK)
+
+static u32 owl_emac_reg_read(struct owl_emac_priv *priv, u32 reg)
+{
+	return readl(priv->base + reg);
+}
+
+static void owl_emac_reg_write(struct owl_emac_priv *priv, u32 reg, u32 data)
+{
+	writel(data, priv->base + reg);
+}
+
+static u32 owl_emac_reg_update(struct owl_emac_priv *priv,
+			       u32 reg, u32 mask, u32 val)
+{
+	u32 data, old_val;
+
+	data = owl_emac_reg_read(priv, reg);
+	old_val = data & mask;
+
+	data &= ~mask;
+	data |= val & mask;
+
+	owl_emac_reg_write(priv, reg, data);
+
+	return old_val;
+}
+
+static void owl_emac_reg_set(struct owl_emac_priv *priv, u32 reg, u32 bits)
+{
+	owl_emac_reg_update(priv, reg, bits, bits);
+}
+
+static void owl_emac_reg_clear(struct owl_emac_priv *priv, u32 reg, u32 bits)
+{
+	owl_emac_reg_update(priv, reg, bits, 0);
+}
+
+static struct device *owl_emac_get_dev(struct owl_emac_priv *priv)
+{
+	return priv->netdev->dev.parent;
+}
+
+static void owl_emac_irq_enable(struct owl_emac_priv *priv)
+{
+	/* Enable all interrupts except TU.
+	 *
+	 * Note the NIE and AIE bits shall also be set in order to actually
+	 * enable the selected interrupts.
+	 */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR7,
+			   OWL_EMAC_BIT_MAC_CSR7_NIE |
+			   OWL_EMAC_BIT_MAC_CSR7_AIE |
+			   OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE);
+}
+
+static void owl_emac_irq_disable(struct owl_emac_priv *priv)
+{
+	/* Disable all interrupts.
+	 *
+	 * WARNING: Unset only the NIE and AIE bits in CSR7 to workaround an
+	 * unexpected side effect (MAC hardware bug?!) where some bits in the
+	 * status register (CSR5) are cleared automatically before being able
+	 * to read them via owl_emac_irq_clear().
+	 */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR7,
+			   OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE);
+}
+
+static u32 owl_emac_irq_status(struct owl_emac_priv *priv)
+{
+	return owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR5);
+}
+
+static u32 owl_emac_irq_clear(struct owl_emac_priv *priv)
+{
+	u32 val = owl_emac_irq_status(priv);
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR5, val);
+
+	return val;
+}
+
+static dma_addr_t owl_emac_dma_map_rx(struct owl_emac_priv *priv,
+				      struct sk_buff *skb)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	/* Buffer pointer for the RX DMA descriptor must be word aligned. */
+	return dma_map_single(dev, skb_tail_pointer(skb),
+			      skb_tailroom(skb), DMA_FROM_DEVICE);
+}
+
+static void owl_emac_dma_unmap_rx(struct owl_emac_priv *priv,
+				  struct sk_buff *skb, dma_addr_t dma_addr)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	dma_unmap_single(dev, dma_addr, skb_tailroom(skb), DMA_FROM_DEVICE);
+}
+
+static dma_addr_t owl_emac_dma_map_tx(struct owl_emac_priv *priv,
+				      struct sk_buff *skb)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	return dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
+}
+
+static void owl_emac_dma_unmap_tx(struct owl_emac_priv *priv,
+				  struct sk_buff *skb, dma_addr_t dma_addr)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	dma_unmap_single(dev, dma_addr, skb_headlen(skb), DMA_TO_DEVICE);
+}
+
+static unsigned int owl_emac_ring_num_unused(struct owl_emac_ring *ring)
+{
+	return CIRC_SPACE(ring->head, ring->tail, ring->size);
+}
+
+static unsigned int owl_emac_ring_get_next(struct owl_emac_ring *ring,
+					   unsigned int cur)
+{
+	return (cur + 1) & (ring->size - 1);
+}
+
+static void owl_emac_ring_push_head(struct owl_emac_ring *ring)
+{
+	ring->head = owl_emac_ring_get_next(ring, ring->head);
+}
+
+static void owl_emac_ring_pop_tail(struct owl_emac_ring *ring)
+{
+	ring->tail = owl_emac_ring_get_next(ring, ring->tail);
+}
+
+static struct sk_buff *owl_emac_alloc_skb(struct net_device *netdev)
+{
+	struct sk_buff *skb;
+	int offset;
+
+	skb = netdev_alloc_skb(netdev, OWL_EMAC_RX_FRAME_MAX_LEN +
+			       OWL_EMAC_SKB_RESERVE);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Ensure 4 bytes DMA alignment. */
+	offset = ((uintptr_t)skb->data) & (OWL_EMAC_SKB_ALIGN - 1);
+	if (unlikely(offset))
+		skb_reserve(skb, OWL_EMAC_SKB_ALIGN - offset);
+
+	return skb;
+}
+
+static int owl_emac_ring_prepare_rx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->rx_ring;
+	struct device *dev = owl_emac_get_dev(priv);
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		skb = owl_emac_alloc_skb(netdev);
+		if (!skb)
+			return -ENOMEM;
+
+		dma_addr = owl_emac_dma_map_rx(priv, skb);
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_kfree_skb(skb);
+			return -ENOMEM;
+		}
+
+		desc = &ring->descs[i];
+		desc->status = OWL_EMAC_BIT_RDES0_OWN;
+		desc->control = skb_tailroom(skb) & OWL_EMAC_MSK_RDES1_RBS1;
+		desc->buf_addr = dma_addr;
+		desc->reserved = 0;
+
+		ring->skbs[i] = skb;
+		ring->skbs_dma[i] = dma_addr;
+	}
+
+	desc->control |= OWL_EMAC_BIT_RDES1_RER;
+
+	ring->head = 0;
+	ring->tail = 0;
+
+	return 0;
+}
+
+static void owl_emac_ring_prepare_tx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct owl_emac_ring_desc *desc;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		desc = &ring->descs[i];
+
+		desc->status = 0;
+		desc->control = OWL_EMAC_BIT_TDES1_IC;
+		desc->buf_addr = 0;
+		desc->reserved = 0;
+	}
+
+	desc->control |= OWL_EMAC_BIT_TDES1_TER;
+
+	memset(ring->skbs_dma, 0, sizeof(dma_addr_t) * ring->size);
+
+	ring->head = 0;
+	ring->tail = 0;
+}
+
+static void owl_emac_ring_unprepare_rx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->rx_ring;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		ring->descs[i].status = 0;
+
+		if (!ring->skbs_dma[i])
+			continue;
+
+		owl_emac_dma_unmap_rx(priv, ring->skbs[i], ring->skbs_dma[i]);
+		ring->skbs_dma[i] = 0;
+
+		dev_kfree_skb(ring->skbs[i]);
+		ring->skbs[i] = NULL;
+	}
+}
+
+static void owl_emac_ring_unprepare_tx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		ring->descs[i].status = 0;
+
+		if (!ring->skbs_dma[i])
+			continue;
+
+		owl_emac_dma_unmap_tx(priv, ring->skbs[i], ring->skbs_dma[i]);
+		ring->skbs_dma[i] = 0;
+
+		dev_kfree_skb(ring->skbs[i]);
+		ring->skbs[i] = NULL;
+	}
+}
+
+static int owl_emac_ring_alloc(struct device *dev, struct owl_emac_ring *ring,
+			       unsigned int size)
+{
+	ring->descs = dmam_alloc_coherent(dev,
+					  sizeof(struct owl_emac_ring_desc) * size,
+					  &ring->descs_dma, GFP_KERNEL);
+	if (!ring->descs)
+		return -ENOMEM;
+
+	ring->skbs = devm_kcalloc(dev, size, sizeof(struct sk_buff *),
+				  GFP_KERNEL);
+	if (!ring->skbs)
+		return -ENOMEM;
+
+	ring->skbs_dma = devm_kcalloc(dev, size, sizeof(dma_addr_t),
+				      GFP_KERNEL);
+	if (!ring->skbs_dma)
+		return -ENOMEM;
+
+	ring->size = size;
+
+	return 0;
+}
+
+static void owl_emac_dma_cmd_resume_rx(struct owl_emac_priv *priv)
+{
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR2,
+			   OWL_EMAC_VAL_MAC_CSR2_RPD);
+}
+
+static void owl_emac_dma_cmd_resume_tx(struct owl_emac_priv *priv)
+{
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR1,
+			   OWL_EMAC_VAL_MAC_CSR1_TPD);
+}
+
+static u32 owl_emac_dma_cmd_set_tx(struct owl_emac_priv *priv, u32 status)
+{
+	return owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+				   OWL_EMAC_BIT_MAC_CSR6_ST, status);
+}
+
+static u32 owl_emac_dma_cmd_start_tx(struct owl_emac_priv *priv)
+{
+	return owl_emac_dma_cmd_set_tx(priv, ~0);
+}
+
+static u32 owl_emac_dma_cmd_set(struct owl_emac_priv *priv, u32 status)
+{
+	return owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+				   OWL_EMAC_MSK_MAC_CSR6_STSR, status);
+}
+
+static u32 owl_emac_dma_cmd_start(struct owl_emac_priv *priv)
+{
+	return owl_emac_dma_cmd_set(priv, ~0);
+}
+
+static u32 owl_emac_dma_cmd_stop(struct owl_emac_priv *priv)
+{
+	return owl_emac_dma_cmd_set(priv, 0);
+}
+
+static void owl_emac_set_hw_mac_addr(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	u8 *mac_addr = netdev->dev_addr;
+	u32 addr_high, addr_low;
+
+	addr_high = mac_addr[0] << 8 | mac_addr[1];
+	addr_low = mac_addr[2] << 24 | mac_addr[3] << 16 |
+		   mac_addr[4] << 8 | mac_addr[5];
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR17, addr_high);
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR16, addr_low);
+}
+
+static void owl_emac_update_link_state(struct owl_emac_priv *priv)
+{
+	u32 val, status;
+
+	if (priv->pause) {
+		val = OWL_EMAC_BIT_MAC_CSR20_FCE | OWL_EMAC_BIT_MAC_CSR20_TUE;
+		val |= OWL_EMAC_BIT_MAC_CSR20_TPE | OWL_EMAC_BIT_MAC_CSR20_RPE;
+		val |= OWL_EMAC_BIT_MAC_CSR20_BPE;
+	} else {
+		val = 0;
+	}
+
+	/* Update flow control. */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR20, val);
+
+	val = (priv->speed == SPEED_100) ? OWL_EMAC_VAL_MAC_CSR6_SPEED_100M :
+					   OWL_EMAC_VAL_MAC_CSR6_SPEED_10M;
+	val <<= OWL_EMAC_OFF_MAC_CSR6_SPEED;
+
+	if (priv->duplex == DUPLEX_FULL)
+		val |= OWL_EMAC_BIT_MAC_CSR6_FD;
+
+	spin_lock_bh(&priv->lock);
+
+	/* Temporarily stop DMA TX & RX. */
+	status = owl_emac_dma_cmd_stop(priv);
+
+	/* Update operation modes. */
+	owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+			    OWL_EMAC_MSK_MAC_CSR6_SPEED |
+			    OWL_EMAC_BIT_MAC_CSR6_FD, val);
+
+	/* Restore DMA TX & RX status. */
+	owl_emac_dma_cmd_set(priv, status);
+
+	spin_unlock_bh(&priv->lock);
+}
+
+static void owl_emac_adjust_link(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
+	bool state_changed = false;
+
+	if (phydev->link) {
+		if (!priv->link) {
+			priv->link = phydev->link;
+			state_changed = true;
+		}
+
+		if (priv->speed != phydev->speed) {
+			priv->speed = phydev->speed;
+			state_changed = true;
+		}
+
+		if (priv->duplex != phydev->duplex) {
+			priv->duplex = phydev->duplex;
+			state_changed = true;
+		}
+
+		if (priv->pause != phydev->pause) {
+			priv->pause = phydev->pause;
+			state_changed = true;
+		}
+	} else {
+		if (priv->link) {
+			priv->link = phydev->link;
+			state_changed = true;
+		}
+	}
+
+	if (state_changed) {
+		if (phydev->link)
+			owl_emac_update_link_state(priv);
+
+		if (netif_msg_link(priv))
+			phy_print_status(phydev);
+	}
+}
+
+static irqreturn_t owl_emac_handle_irq(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	if (netif_running(netdev)) {
+		owl_emac_irq_disable(priv);
+		napi_schedule(&priv->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void owl_emac_ether_addr_push(u8 **dst, const u8 *src)
+{
+	u32 *a = (u32 *)(*dst);
+	const u16 *b = (const u16 *)src;
+
+	a[0] = b[0];
+	a[1] = b[1];
+	a[2] = b[2];
+
+	*dst += 12;
+}
+
+static void
+owl_emac_setup_frame_prepare(struct owl_emac_priv *priv, struct sk_buff *skb)
+{
+	const u8 bcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	const u8 *mac_addr = priv->netdev->dev_addr;
+	u8 *frame;
+	int i;
+
+	skb_put(skb, OWL_EMAC_SETUP_FRAME_LEN);
+
+	frame = skb->data;
+	memset(frame, 0, skb->len);
+
+	owl_emac_ether_addr_push(&frame, mac_addr);
+	owl_emac_ether_addr_push(&frame, bcast_addr);
+
+	/* Fill multicast addresses. */
+	WARN_ON(priv->mcaddr_list.count >= OWL_EMAC_MAX_MULTICAST_ADDRS);
+	for (i = 0; i < priv->mcaddr_list.count; i++) {
+		mac_addr = priv->mcaddr_list.addrs[i];
+		owl_emac_ether_addr_push(&frame, mac_addr);
+	}
+}
+
+/* The setup frame is a special descriptor which is used to provide physical
+ * addresses (i.e. mac, broadcast and multicast) to the MAC hardware for
+ * filtering purposes. To be recognized as a setup frame, the TDES1_SET bit
+ * must be set in the TX descriptor control field.
+ */
+static int owl_emac_setup_frame_xmit(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *skb;
+	unsigned int tx_head;
+	u32 status, control;
+	dma_addr_t dma_addr;
+	int ret;
+
+	skb = owl_emac_alloc_skb(netdev);
+	if (!skb)
+		return -ENOMEM;
+
+	owl_emac_setup_frame_prepare(priv, skb);
+
+	dma_addr = owl_emac_dma_map_tx(priv, skb);
+	if (dma_mapping_error(owl_emac_get_dev(priv), dma_addr)) {
+		ret = -ENOMEM;
+		goto err_free_skb;
+	}
+
+	spin_lock_bh(&priv->lock);
+
+	tx_head = ring->head;
+	desc = &ring->descs[tx_head];
+
+	status = READ_ONCE(desc->status);
+	control = READ_ONCE(desc->control);
+	dma_rmb(); /* Ensure data has been read before used. */
+
+	if (unlikely(status & OWL_EMAC_BIT_TDES0_OWN) ||
+	    !owl_emac_ring_num_unused(ring)) {
+		spin_unlock_bh(&priv->lock);
+		owl_emac_dma_unmap_tx(priv, skb, dma_addr);
+		ret = -EBUSY;
+		goto err_free_skb;
+	}
+
+	ring->skbs[tx_head] = skb;
+	ring->skbs_dma[tx_head] = dma_addr;
+
+	control &= OWL_EMAC_BIT_TDES1_IC | OWL_EMAC_BIT_TDES1_TER; /* Maintain bits */
+	control |= OWL_EMAC_BIT_TDES1_SET;
+	control |= OWL_EMAC_MSK_TDES1_TBS1 & skb->len;
+
+	WRITE_ONCE(desc->control, control);
+	WRITE_ONCE(desc->buf_addr, dma_addr);
+	dma_wmb(); /* Flush descriptor before changing ownership. */
+	WRITE_ONCE(desc->status, OWL_EMAC_BIT_TDES0_OWN);
+
+	owl_emac_ring_push_head(ring);
+
+	/* Temporarily enable DMA TX. */
+	status = owl_emac_dma_cmd_start_tx(priv);
+
+	/* Trigger setup frame processing. */
+	owl_emac_dma_cmd_resume_tx(priv);
+
+	/* Restore DMA TX status. */
+	owl_emac_dma_cmd_set_tx(priv, status);
+
+	/* Stop regular TX until setup frame is processed. */
+	netif_stop_queue(netdev);
+
+	spin_unlock_bh(&priv->lock);
+
+	return 0;
+
+err_free_skb:
+	dev_kfree_skb(skb);
+	return ret;
+}
+
+static netdev_tx_t owl_emac_ndo_start_xmit(struct sk_buff *skb,
+					   struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct device *dev = owl_emac_get_dev(priv);
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct owl_emac_ring_desc *desc;
+	unsigned int tx_head;
+	u32 status, control;
+	dma_addr_t dma_addr;
+
+	dma_addr = owl_emac_dma_map_tx(priv, skb);
+	if (dma_mapping_error(dev, dma_addr)) {
+		dev_err_ratelimited(&netdev->dev, "TX DMA mapping failed\n");
+		dev_kfree_skb(skb);
+		netdev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	spin_lock_bh(&priv->lock);
+
+	tx_head = ring->head;
+	desc = &ring->descs[tx_head];
+
+	status = READ_ONCE(desc->status);
+	control = READ_ONCE(desc->control);
+	dma_rmb(); /* Ensure data has been read before used. */
+
+	if (!owl_emac_ring_num_unused(ring) ||
+	    unlikely(status & OWL_EMAC_BIT_TDES0_OWN)) {
+		netif_stop_queue(netdev);
+		spin_unlock_bh(&priv->lock);
+
+		dev_dbg_ratelimited(&netdev->dev, "TX buffer full, status=0x%08x\n",
+				    owl_emac_irq_status(priv));
+		owl_emac_dma_unmap_tx(priv, skb, dma_addr);
+		netdev->stats.tx_dropped++;
+		return NETDEV_TX_BUSY;
+	}
+
+	ring->skbs[tx_head] = skb;
+	ring->skbs_dma[tx_head] = dma_addr;
+
+	control &= OWL_EMAC_BIT_TDES1_IC | OWL_EMAC_BIT_TDES1_TER; /* Maintain bits */
+	control |= OWL_EMAC_BIT_TDES1_FS | OWL_EMAC_BIT_TDES1_LS;
+	control |= OWL_EMAC_MSK_TDES1_TBS1 & skb->len;
+
+	WRITE_ONCE(desc->control, control);
+	WRITE_ONCE(desc->buf_addr, dma_addr);
+	dma_wmb(); /* Flush descriptor before changing ownership. */
+	WRITE_ONCE(desc->status, OWL_EMAC_BIT_TDES0_OWN);
+
+	owl_emac_dma_cmd_resume_tx(priv);
+	owl_emac_ring_push_head(ring);
+
+	/* FIXME: The transmission is currently restricted to a single frame
+	 * at a time as a workaround for a MAC hardware bug that causes random
+	 * freeze of the TX queue processor.
+	 */
+	netif_stop_queue(netdev);
+
+	spin_unlock_bh(&priv->lock);
+
+	return NETDEV_TX_OK;
+}
+
+static bool owl_emac_tx_complete_tail(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *skb;
+	unsigned int tx_tail;
+	u32 status;
+
+	tx_tail = ring->tail;
+	desc = &ring->descs[tx_tail];
+
+	status = READ_ONCE(desc->status);
+	dma_rmb(); /* Ensure data has been read before used. */
+
+	if (status & OWL_EMAC_BIT_TDES0_OWN)
+		return false;
+
+	/* Check for errors. */
+	if (status & OWL_EMAC_BIT_TDES0_ES) {
+		dev_dbg_ratelimited(&netdev->dev,
+				    "TX complete error status: 0x%08x\n",
+				    status);
+
+		netdev->stats.tx_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_UF)
+			netdev->stats.tx_fifo_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_EC)
+			netdev->stats.tx_aborted_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_LC)
+			netdev->stats.tx_window_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_NC)
+			netdev->stats.tx_heartbeat_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_LO)
+			netdev->stats.tx_carrier_errors++;
+	} else {
+		netdev->stats.tx_packets++;
+		netdev->stats.tx_bytes += ring->skbs[tx_tail]->len;
+	}
+
+	/* Some collisions occurred, but pkt has been transmitted. */
+	if (status & OWL_EMAC_BIT_TDES0_DE)
+		netdev->stats.collisions++;
+
+	skb = ring->skbs[tx_tail];
+	owl_emac_dma_unmap_tx(priv, skb, ring->skbs_dma[tx_tail]);
+	dev_kfree_skb(skb);
+
+	ring->skbs[tx_tail] = NULL;
+	ring->skbs_dma[tx_tail] = 0;
+
+	owl_emac_ring_pop_tail(ring);
+
+	if (unlikely(netif_queue_stopped(netdev)))
+		netif_wake_queue(netdev);
+
+	return true;
+}
+
+static void owl_emac_tx_complete(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct net_device *netdev = priv->netdev;
+	unsigned int tx_next;
+	u32 status;
+
+	spin_lock(&priv->lock);
+
+	while (ring->tail != ring->head) {
+		if (!owl_emac_tx_complete_tail(priv))
+			break;
+	}
+
+	/* FIXME: This is a workaround for a MAC hardware bug not clearing
+	 * (sometimes) the OWN bit for a transmitted frame descriptor.
+	 *
+	 * At this point, when TX queue is full, the tail descriptor has the
+	 * OWN bit set, which normally means the frame has not been processed
+	 * or transmitted yet. But if there is at least one descriptor in the
+	 * queue having the OWN bit cleared, we can safely assume the tail
+	 * frame has been also processed by the MAC hardware.
+	 *
+	 * If that's the case, let's force the frame completion by manually
+	 * clearing the OWN bit.
+	 */
+	if (unlikely(!owl_emac_ring_num_unused(ring))) {
+		tx_next = ring->tail;
+
+		while ((tx_next = owl_emac_ring_get_next(ring, tx_next)) != ring->head) {
+			status = READ_ONCE(ring->descs[tx_next].status);
+			dma_rmb(); /* Ensure data has been read before used. */
+
+			if (status & OWL_EMAC_BIT_TDES0_OWN)
+				continue;
+
+			netdev_dbg(netdev, "Found uncleared TX desc OWN bit\n");
+
+			status = READ_ONCE(ring->descs[ring->tail].status);
+			dma_rmb(); /* Ensure data has been read before used. */
+			status &= ~OWL_EMAC_BIT_TDES0_OWN;
+			WRITE_ONCE(ring->descs[ring->tail].status, status);
+
+			owl_emac_tx_complete_tail(priv);
+			break;
+		}
+	}
+
+	spin_unlock(&priv->lock);
+}
+
+static int owl_emac_rx_process(struct owl_emac_priv *priv, int budget)
+{
+	struct owl_emac_ring *ring = &priv->rx_ring;
+	struct device *dev = owl_emac_get_dev(priv);
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *curr_skb, *new_skb;
+	dma_addr_t curr_dma, new_dma;
+	unsigned int rx_tail, len;
+	u32 status;
+	int recv = 0;
+
+	while (recv < budget) {
+		spin_lock(&priv->lock);
+
+		rx_tail = ring->tail;
+		desc = &ring->descs[rx_tail];
+
+		status = READ_ONCE(desc->status);
+		dma_rmb(); /* Ensure data has been read before used. */
+
+		if (status & OWL_EMAC_BIT_RDES0_OWN) {
+			spin_unlock(&priv->lock);
+			break;
+		}
+
+		curr_skb = ring->skbs[rx_tail];
+		curr_dma = ring->skbs_dma[rx_tail];
+		owl_emac_ring_pop_tail(ring);
+
+		spin_unlock(&priv->lock);
+
+		if (status & (OWL_EMAC_BIT_RDES0_DE | OWL_EMAC_BIT_RDES0_RF |
+		    OWL_EMAC_BIT_RDES0_TL | OWL_EMAC_BIT_RDES0_CS |
+		    OWL_EMAC_BIT_RDES0_DB | OWL_EMAC_BIT_RDES0_CE |
+		    OWL_EMAC_BIT_RDES0_ZERO)) {
+			dev_dbg_ratelimited(&netdev->dev,
+					    "RX desc error status: 0x%08x\n",
+					    status);
+
+			if (status & OWL_EMAC_BIT_RDES0_DE)
+				netdev->stats.rx_over_errors++;
+
+			if (status & (OWL_EMAC_BIT_RDES0_RF | OWL_EMAC_BIT_RDES0_DB))
+				netdev->stats.rx_frame_errors++;
+
+			if (status & OWL_EMAC_BIT_RDES0_TL)
+				netdev->stats.rx_length_errors++;
+
+			if (status & OWL_EMAC_BIT_RDES0_CS)
+				netdev->stats.collisions++;
+
+			if (status & OWL_EMAC_BIT_RDES0_CE)
+				netdev->stats.rx_crc_errors++;
+
+			if (status & OWL_EMAC_BIT_RDES0_ZERO)
+				netdev->stats.rx_fifo_errors++;
+
+			goto drop_skb;
+		}
+
+		len = (status & OWL_EMAC_MSK_RDES0_FL) >> OWL_EMAC_OFF_RDES0_FL;
+		if (unlikely(len > OWL_EMAC_RX_FRAME_MAX_LEN)) {
+			netdev->stats.rx_length_errors++;
+			netdev_err(netdev, "invalid RX frame len: %u\n", len);
+			goto drop_skb;
+		}
+
+		/* Prepare new skb before receiving the current one. */
+		new_skb = owl_emac_alloc_skb(netdev);
+		if (unlikely(!new_skb))
+			goto drop_skb;
+
+		new_dma = owl_emac_dma_map_rx(priv, new_skb);
+		if (dma_mapping_error(dev, new_dma)) {
+			dev_kfree_skb(new_skb);
+			netdev_err(netdev, "RX DMA mapping failed\n");
+			goto drop_skb;
+		}
+
+		owl_emac_dma_unmap_rx(priv, curr_skb, curr_dma);
+
+		skb_put(curr_skb, len - ETH_FCS_LEN);
+		curr_skb->ip_summed = CHECKSUM_NONE;
+		curr_skb->protocol = eth_type_trans(curr_skb, netdev);
+		curr_skb->dev = netdev;
+
+		netif_receive_skb(curr_skb);
+
+		netdev->stats.rx_packets++;
+		netdev->stats.rx_bytes += len;
+		recv++;
+		goto push_skb;
+
+drop_skb:
+		netdev->stats.rx_dropped++;
+		netdev->stats.rx_errors++;
+		/* Reuse the current skb. */
+		new_skb = curr_skb;
+		new_dma = curr_dma;
+
+push_skb:
+		spin_lock(&priv->lock);
+
+		ring->skbs[ring->head] = new_skb;
+		ring->skbs_dma[ring->head] = new_dma;
+
+		WRITE_ONCE(desc->buf_addr, new_dma);
+		dma_wmb(); /* Flush descriptor before changing ownership. */
+		WRITE_ONCE(desc->status, OWL_EMAC_BIT_RDES0_OWN);
+
+		owl_emac_ring_push_head(ring);
+
+		spin_unlock(&priv->lock);
+	}
+
+	return recv;
+}
+
+static int owl_emac_poll(struct napi_struct *napi, int budget)
+{
+	int work_done = 0, ru_cnt = 0, recv;
+	static int tx_err_cnt, rx_err_cnt;
+	struct owl_emac_priv *priv;
+	u32 status, proc_status;
+
+	priv = container_of(napi, struct owl_emac_priv, napi);
+
+	while ((status = owl_emac_irq_clear(priv)) &
+	       (OWL_EMAC_BIT_MAC_CSR5_NIS | OWL_EMAC_BIT_MAC_CSR5_AIS)) {
+		recv = 0;
+
+		/* TX setup frame raises ETI instead of TI. */
+		if (status & (OWL_EMAC_BIT_MAC_CSR5_TI | OWL_EMAC_BIT_MAC_CSR5_ETI)) {
+			owl_emac_tx_complete(priv);
+			tx_err_cnt = 0;
+
+			/* Count MAC internal RX errors. */
+			proc_status = status & OWL_EMAC_MSK_MAC_CSR5_RS;
+			proc_status >>= OWL_EMAC_OFF_MAC_CSR5_RS;
+			if (proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_DATA ||
+			    proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_CDES ||
+			    proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_FDES)
+				rx_err_cnt++;
+		}
+
+		if (status & OWL_EMAC_BIT_MAC_CSR5_RI) {
+			recv = owl_emac_rx_process(priv, budget - work_done);
+			rx_err_cnt = 0;
+
+			/* Count MAC internal TX errors. */
+			proc_status = status & OWL_EMAC_MSK_MAC_CSR5_TS;
+			proc_status >>= OWL_EMAC_OFF_MAC_CSR5_TS;
+			if (proc_status == OWL_EMAC_VAL_MAC_CSR5_TS_DATA ||
+			    proc_status == OWL_EMAC_VAL_MAC_CSR5_TS_CDES)
+				tx_err_cnt++;
+		} else if (status & OWL_EMAC_BIT_MAC_CSR5_RU) {
+			/* MAC AHB is in suspended state, will return to RX
+			 * descriptor processing when the host changes ownership
+			 * of the descriptor and either an RX poll demand CMD is
+			 * issued or a new frame is recognized by the MAC AHB.
+			 */
+			if (++ru_cnt == 2)
+				owl_emac_dma_cmd_resume_rx(priv);
+
+			recv = owl_emac_rx_process(priv, budget - work_done);
+
+			/* Guard against too many RU interrupts. */
+			if (ru_cnt > 3)
+				break;
+		}
+
+		work_done += recv;
+		if (work_done >= budget)
+			break;
+	}
+
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		owl_emac_irq_enable(priv);
+	}
+
+	/* Reset MAC when getting too many internal TX or RX errors. */
+	if (tx_err_cnt > 10 || rx_err_cnt > 10) {
+		netdev_dbg(priv->netdev, "%s error status: 0x%08x\n",
+			   tx_err_cnt > 10 ? "TX" : "RX", status);
+		rx_err_cnt = 0;
+		tx_err_cnt = 0;
+		schedule_work(&priv->mac_reset_task);
+	}
+
+	return work_done;
+}
+
+static void owl_emac_mdio_clock_enable(struct owl_emac_priv *priv)
+{
+	u32 val;
+
+	/* Enable MDC clock generation by adjusting CLKDIV according to
+	 * the vendor implementation of the original driver.
+	 */
+	val = owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR10);
+	val &= OWL_EMAC_MSK_MAC_CSR10_CLKDIV;
+	val |= OWL_EMAC_VAL_MAC_CSR10_CLKDIV_128 << OWL_EMAC_OFF_MAC_CSR10_CLKDIV;
+
+	val |= OWL_EMAC_BIT_MAC_CSR10_SB;
+	val |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_CDS << OWL_EMAC_OFF_MAC_CSR10_OPCODE;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, val);
+}
+
+static void owl_emac_core_hw_reset(struct owl_emac_priv *priv)
+{
+	/* Trigger hardware reset. */
+	reset_control_assert(priv->reset);
+	usleep_range(10, 20);
+	reset_control_deassert(priv->reset);
+	usleep_range(100, 200);
+}
+
+static int owl_emac_core_sw_reset(struct owl_emac_priv *priv)
+{
+	u32 val;
+	int ret;
+
+	/* Trigger software reset. */
+	owl_emac_reg_set(priv, OWL_EMAC_REG_MAC_CSR0, OWL_EMAC_BIT_MAC_CSR0_SWR);
+	ret = readl_poll_timeout(priv->base + OWL_EMAC_REG_MAC_CSR0,
+				 val, !(val & OWL_EMAC_BIT_MAC_CSR0_SWR),
+				 OWL_EMAC_POLL_DELAY_USEC,
+				 OWL_EMAC_RESET_POLL_TIMEOUT_USEC);
+	if (ret)
+		return ret;
+
+	if (priv->phy_mode == PHY_INTERFACE_MODE_RMII) {
+		/* Enable RMII and use the 50MHz rmii clk as output to PHY. */
+		val = 0;
+	} else {
+		/* Enable SMII and use the 125MHz rmii clk as output to PHY.
+		 * Additionally set SMII SYNC delay to 4 half cycle.
+		 */
+		val = 0x04 << OWL_EMAC_OFF_MAC_CTRL_SSDC;
+		val |= OWL_EMAC_BIT_MAC_CTRL_RSIS;
+	}
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CTRL, val);
+
+	/* MDC is disabled after reset. */
+	owl_emac_mdio_clock_enable(priv);
+
+	/* Set FIFO pause & restart threshold levels. */
+	val = 0x40 << OWL_EMAC_OFF_MAC_CSR19_FPTL;
+	val |= 0x10 << OWL_EMAC_OFF_MAC_CSR19_FRTL;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR19, val);
+
+	/* Set flow control pause quanta time to ~100 ms. */
+	val = 0x4FFF << OWL_EMAC_OFF_MAC_CSR18_PQT;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR18, val);
+
+	/* Setup interrupt mitigation. */
+	val = 7 << OWL_EMAC_OFF_MAC_CSR11_NRP;
+	val |= 4 << OWL_EMAC_OFF_MAC_CSR11_RT;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR11, val);
+
+	/* Set RX/TX rings base addresses. */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR3,
+			   (u32)(priv->rx_ring.descs_dma));
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR4,
+			   (u32)(priv->tx_ring.descs_dma));
+
+	/* Setup initial operation mode. */
+	val = OWL_EMAC_VAL_MAC_CSR6_SPEED_100M << OWL_EMAC_OFF_MAC_CSR6_SPEED;
+	val |= OWL_EMAC_BIT_MAC_CSR6_FD;
+	owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+			    OWL_EMAC_MSK_MAC_CSR6_SPEED |
+			    OWL_EMAC_BIT_MAC_CSR6_FD, val);
+	owl_emac_reg_clear(priv, OWL_EMAC_REG_MAC_CSR6,
+			   OWL_EMAC_BIT_MAC_CSR6_PR | OWL_EMAC_BIT_MAC_CSR6_PM);
+
+	priv->link = 0;
+	priv->speed = SPEED_UNKNOWN;
+	priv->duplex = DUPLEX_UNKNOWN;
+	priv->pause = 0;
+	priv->mcaddr_list.count = 0;
+
+	return 0;
+}
+
+static int owl_emac_enable(struct net_device *netdev, bool start_phy)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	owl_emac_dma_cmd_stop(priv);
+	owl_emac_irq_disable(priv);
+	owl_emac_irq_clear(priv);
+
+	owl_emac_ring_prepare_tx(priv);
+	ret = owl_emac_ring_prepare_rx(priv);
+	if (ret)
+		goto err_unprep;
+
+	ret = owl_emac_core_sw_reset(priv);
+	if (ret) {
+		netdev_err(netdev, "failed to soft reset MAC core: %d\n", ret);
+		goto err_unprep;
+	}
+
+	owl_emac_set_hw_mac_addr(netdev);
+	owl_emac_setup_frame_xmit(priv);
+
+	netdev_reset_queue(netdev);
+	napi_enable(&priv->napi);
+
+	owl_emac_irq_enable(priv);
+	owl_emac_dma_cmd_start(priv);
+
+	if (start_phy)
+		phy_start(netdev->phydev);
+
+	netif_start_queue(netdev);
+
+	return 0;
+
+err_unprep:
+	owl_emac_ring_unprepare_rx(priv);
+	owl_emac_ring_unprepare_tx(priv);
+
+	return ret;
+}
+
+static void owl_emac_disable(struct net_device *netdev, bool stop_phy)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	owl_emac_dma_cmd_stop(priv);
+	owl_emac_irq_disable(priv);
+
+	netif_stop_queue(netdev);
+	napi_disable(&priv->napi);
+
+	if (stop_phy)
+		phy_stop(netdev->phydev);
+
+	owl_emac_ring_unprepare_rx(priv);
+	owl_emac_ring_unprepare_tx(priv);
+}
+
+static int owl_emac_ndo_open(struct net_device *netdev)
+{
+	return owl_emac_enable(netdev, true);
+}
+
+static int owl_emac_ndo_stop(struct net_device *netdev)
+{
+	owl_emac_disable(netdev, true);
+
+	return 0;
+}
+
+static void owl_emac_set_multicast(struct net_device *netdev, int count)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct netdev_hw_addr *ha;
+	int index = 0;
+
+	if (count <= 0) {
+		priv->mcaddr_list.count = 0;
+		return;
+	}
+
+	netdev_for_each_mc_addr(ha, netdev) {
+		if (!is_multicast_ether_addr(ha->addr))
+			continue;
+
+		WARN_ON(index >= OWL_EMAC_MAX_MULTICAST_ADDRS);
+		ether_addr_copy(priv->mcaddr_list.addrs[index++], ha->addr);
+	}
+
+	priv->mcaddr_list.count = index;
+
+	owl_emac_setup_frame_xmit(priv);
+}
+
+static void owl_emac_ndo_set_rx_mode(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	u32 status, val = 0;
+	int mcast_count = 0;
+
+	if (netdev->flags & IFF_PROMISC) {
+		val = OWL_EMAC_BIT_MAC_CSR6_PR;
+	} else if (netdev->flags & IFF_ALLMULTI) {
+		val = OWL_EMAC_BIT_MAC_CSR6_PM;
+	} else if (netdev->flags & IFF_MULTICAST) {
+		mcast_count = netdev_mc_count(netdev);
+
+		if (mcast_count > OWL_EMAC_MAX_MULTICAST_ADDRS) {
+			val = OWL_EMAC_BIT_MAC_CSR6_PM;
+			mcast_count = 0;
+		}
+	}
+
+	spin_lock_bh(&priv->lock);
+
+	/* Temporarily stop DMA TX & RX. */
+	status = owl_emac_dma_cmd_stop(priv);
+
+	/* Update operation modes. */
+	owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+			    OWL_EMAC_BIT_MAC_CSR6_PR | OWL_EMAC_BIT_MAC_CSR6_PM,
+			    val);
+
+	/* Restore DMA TX & RX status. */
+	owl_emac_dma_cmd_set(priv, status);
+
+	spin_unlock_bh(&priv->lock);
+
+	/* Set/reset multicast addr list. */
+	owl_emac_set_multicast(netdev, mcast_count);
+}
+
+static int owl_emac_ndo_set_mac_addr(struct net_device *netdev, void *addr)
+{
+	struct sockaddr *skaddr = addr;
+
+	if (!is_valid_ether_addr(skaddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (netif_running(netdev))
+		return -EBUSY;
+
+	memcpy(netdev->dev_addr, skaddr->sa_data, netdev->addr_len);
+	owl_emac_set_hw_mac_addr(netdev);
+
+	return owl_emac_setup_frame_xmit(netdev_priv(netdev));
+}
+
+static int owl_emac_ndo_do_ioctl(struct net_device *netdev,
+				 struct ifreq *req, int cmd)
+{
+	if (!netif_running(netdev))
+		return -EINVAL;
+
+	return phy_mii_ioctl(netdev->phydev, req, cmd);
+}
+
+static void owl_emac_ndo_tx_timeout(struct net_device *netdev,
+				    unsigned int txqueue)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	schedule_work(&priv->mac_reset_task);
+}
+
+static void owl_emac_reset_task(struct work_struct *work)
+{
+	struct owl_emac_priv *priv;
+
+	priv = container_of(work, struct owl_emac_priv, mac_reset_task);
+
+	netdev_dbg(priv->netdev, "resetting MAC\n");
+	owl_emac_disable(priv->netdev, false);
+	owl_emac_enable(priv->netdev, false);
+}
+
+static struct net_device_stats *
+owl_emac_ndo_get_stats(struct net_device *netdev)
+{
+	/* FIXME: If possible, try to get stats from MAC hardware registers
+	 * instead of tracking them manually in the driver.
+	 */
+
+	return &netdev->stats;
+}
+
+static const struct net_device_ops owl_emac_netdev_ops = {
+	.ndo_open		= owl_emac_ndo_open,
+	.ndo_stop		= owl_emac_ndo_stop,
+	.ndo_start_xmit		= owl_emac_ndo_start_xmit,
+	.ndo_set_rx_mode	= owl_emac_ndo_set_rx_mode,
+	.ndo_set_mac_address	= owl_emac_ndo_set_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_do_ioctl		= owl_emac_ndo_do_ioctl,
+	.ndo_tx_timeout         = owl_emac_ndo_tx_timeout,
+	.ndo_get_stats		= owl_emac_ndo_get_stats,
+};
+
+static void owl_emac_ethtool_get_drvinfo(struct net_device *dev,
+					 struct ethtool_drvinfo *info)
+{
+	strscpy(info->driver, OWL_EMAC_DRVNAME, sizeof(info->driver));
+}
+
+static u32 owl_emac_ethtool_get_msglevel(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	return priv->msg_enable;
+}
+
+static void owl_emac_ethtool_set_msglevel(struct net_device *ndev, u32 val)
+{
+	struct owl_emac_priv *priv = netdev_priv(ndev);
+
+	priv->msg_enable = val;
+}
+
+static const struct ethtool_ops owl_emac_ethtool_ops = {
+	.get_drvinfo		= owl_emac_ethtool_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.get_msglevel		= owl_emac_ethtool_get_msglevel,
+	.set_msglevel		= owl_emac_ethtool_set_msglevel,
+};
+
+static int owl_emac_mdio_wait(struct owl_emac_priv *priv)
+{
+	u32 val;
+
+	/* Wait while data transfer is in progress. */
+	return readl_poll_timeout(priv->base + OWL_EMAC_REG_MAC_CSR10,
+				  val, !(val & OWL_EMAC_BIT_MAC_CSR10_SB),
+				  OWL_EMAC_POLL_DELAY_USEC,
+				  OWL_EMAC_MDIO_POLL_TIMEOUT_USEC);
+}
+
+static int owl_emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+	struct owl_emac_priv *priv = bus->priv;
+	u32 data, tmp;
+	int ret;
+
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	data = OWL_EMAC_BIT_MAC_CSR10_SB;
+	data |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_RD << OWL_EMAC_OFF_MAC_CSR10_OPCODE;
+
+	tmp = addr << OWL_EMAC_OFF_MAC_CSR10_PHYADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_PHYADD;
+
+	tmp = regnum << OWL_EMAC_OFF_MAC_CSR10_REGADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_REGADD;
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, data);
+
+	ret = owl_emac_mdio_wait(priv);
+	if (ret)
+		return ret;
+
+	data = owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR10);
+	data &= OWL_EMAC_MSK_MAC_CSR10_DATA;
+
+	return data;
+}
+
+static int
+owl_emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
+{
+	struct owl_emac_priv *priv = bus->priv;
+	u32 data, tmp;
+
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	data = OWL_EMAC_BIT_MAC_CSR10_SB;
+	data |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR << OWL_EMAC_OFF_MAC_CSR10_OPCODE;
+
+	tmp = addr << OWL_EMAC_OFF_MAC_CSR10_PHYADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_PHYADD;
+
+	tmp = regnum << OWL_EMAC_OFF_MAC_CSR10_REGADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_REGADD;
+
+	data |= val & OWL_EMAC_MSK_MAC_CSR10_DATA;
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, data);
+
+	return owl_emac_mdio_wait(priv);
+}
+
+static int owl_emac_mdio_init(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct device *dev = owl_emac_get_dev(priv);
+	struct device_node *mdio_node;
+	int ret;
+
+	mdio_node = of_get_child_by_name(dev->of_node, "mdio");
+	if (!mdio_node)
+		return -ENODEV;
+
+	if (!of_device_is_available(mdio_node)) {
+		ret = -ENODEV;
+		goto err_put_node;
+	}
+
+	priv->mii = devm_mdiobus_alloc(dev);
+	if (!priv->mii) {
+		ret = -ENOMEM;
+		goto err_put_node;
+	}
+
+	snprintf(priv->mii->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+	priv->mii->name = "owl-emac-mdio";
+	priv->mii->parent = dev;
+	priv->mii->read = owl_emac_mdio_read;
+	priv->mii->write = owl_emac_mdio_write;
+	priv->mii->phy_mask = ~0; /* Mask out all PHYs from auto probing. */
+	priv->mii->priv = priv;
+
+	ret = devm_of_mdiobus_register(dev, priv->mii, mdio_node);
+
+err_put_node:
+	of_node_put(mdio_node);
+	return ret;
+}
+
+static int owl_emac_phy_init(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct device *dev = owl_emac_get_dev(priv);
+	struct phy_device *phy;
+
+	phy = of_phy_get_and_connect(netdev, dev->of_node,
+				     owl_emac_adjust_link);
+	if (!phy)
+		return -ENODEV;
+
+	phy_set_sym_pause(phy, true, true, true);
+
+	if (netif_msg_link(priv))
+		phy_attached_info(phy);
+
+	return 0;
+}
+
+static void owl_emac_get_mac_addr(struct net_device *netdev)
+{
+	struct device *dev = netdev->dev.parent;
+	int ret;
+
+	ret = eth_platform_get_mac_address(dev, netdev->dev_addr);
+	if (!ret && is_valid_ether_addr(netdev->dev_addr))
+		return;
+
+	eth_hw_addr_random(netdev);
+	dev_warn(dev, "using random MAC address %pM\n", netdev->dev_addr);
+}
+
+static __maybe_unused int owl_emac_suspend(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	disable_irq(netdev->irq);
+
+	if (netif_running(netdev)) {
+		owl_emac_disable(netdev, true);
+		netif_device_detach(netdev);
+	}
+
+	clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks);
+
+	return 0;
+}
+
+static __maybe_unused int owl_emac_resume(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	ret = clk_bulk_prepare_enable(OWL_EMAC_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	if (netif_running(netdev)) {
+		owl_emac_core_hw_reset(priv);
+		owl_emac_core_sw_reset(priv);
+
+		ret = owl_emac_enable(netdev, true);
+		if (ret) {
+			clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks);
+			return ret;
+		}
+
+		netif_device_attach(netdev);
+	}
+
+	enable_irq(netdev->irq);
+
+	return 0;
+}
+
+static void owl_emac_clk_disable_unprepare(void *data)
+{
+	struct owl_emac_priv *priv = data;
+
+	clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks);
+}
+
+static int owl_emac_clk_set_rate(struct owl_emac_priv *priv)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+	unsigned long rate;
+	int ret;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		rate = 50000000;
+		break;
+
+	case PHY_INTERFACE_MODE_SMII:
+		rate = 125000000;
+		break;
+
+	default:
+		dev_err(dev, "unsupported phy interface mode %d\n",
+			priv->phy_mode);
+		return -EOPNOTSUPP;
+	}
+
+	ret = clk_set_rate(priv->clks[OWL_EMAC_CLK_RMII].clk, rate);
+	if (ret)
+		dev_err(dev, "failed to set RMII clock rate: %d\n", ret);
+
+	return ret;
+}
+
+static int owl_emac_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct net_device *netdev;
+	struct owl_emac_priv *priv;
+	int ret, i;
+
+	netdev = devm_alloc_etherdev(dev, sizeof(*priv));
+	if (!netdev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, netdev);
+	SET_NETDEV_DEV(netdev, dev);
+
+	priv = netdev_priv(netdev);
+	priv->netdev = netdev;
+	priv->msg_enable = netif_msg_init(-1, OWL_EMAC_DEFAULT_MSG_ENABLE);
+
+	ret = of_get_phy_mode(dev->of_node, &priv->phy_mode);
+	if (ret) {
+		dev_err(dev, "failed to get phy mode: %d\n", ret);
+		return ret;
+	}
+
+	spin_lock_init(&priv->lock);
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(dev, "unsupported DMA mask\n");
+		return ret;
+	}
+
+	ret = owl_emac_ring_alloc(dev, &priv->rx_ring, OWL_EMAC_RX_RING_SIZE);
+	if (ret)
+		return ret;
+
+	ret = owl_emac_ring_alloc(dev, &priv->tx_ring, OWL_EMAC_TX_RING_SIZE);
+	if (ret)
+		return ret;
+
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	netdev->irq = platform_get_irq(pdev, 0);
+	if (netdev->irq < 0)
+		return netdev->irq;
+
+	ret = devm_request_irq(dev, netdev->irq, owl_emac_handle_irq,
+			       IRQF_SHARED, netdev->name, netdev);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", netdev->irq);
+		return ret;
+	}
+
+	for (i = 0; i < OWL_EMAC_NCLKS; i++)
+		priv->clks[i].id = owl_emac_clk_names[i];
+
+	ret = devm_clk_bulk_get(dev, OWL_EMAC_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	ret = clk_bulk_prepare_enable(OWL_EMAC_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(dev, owl_emac_clk_disable_unprepare, priv);
+	if (ret)
+		return ret;
+
+	ret = owl_emac_clk_set_rate(priv);
+	if (ret)
+		return ret;
+
+	priv->reset = devm_reset_control_get_exclusive(dev, NULL);
+	if (IS_ERR(priv->reset))
+		return dev_err_probe(dev, PTR_ERR(priv->reset),
+				     "failed to get reset control");
+
+	owl_emac_get_mac_addr(netdev);
+
+	owl_emac_core_hw_reset(priv);
+	owl_emac_mdio_clock_enable(priv);
+
+	ret = owl_emac_mdio_init(netdev);
+	if (ret) {
+		dev_err(dev, "failed to initialize MDIO bus\n");
+		return ret;
+	}
+
+	ret = owl_emac_phy_init(netdev);
+	if (ret) {
+		dev_err(dev, "failed to initialize PHY\n");
+		return ret;
+	}
+
+	INIT_WORK(&priv->mac_reset_task, owl_emac_reset_task);
+
+	netdev->min_mtu = OWL_EMAC_MTU_MIN;
+	netdev->max_mtu = OWL_EMAC_MTU_MAX;
+	netdev->watchdog_timeo = OWL_EMAC_TX_TIMEOUT;
+	netdev->netdev_ops = &owl_emac_netdev_ops;
+	netdev->ethtool_ops = &owl_emac_ethtool_ops;
+	netif_napi_add(netdev, &priv->napi, owl_emac_poll, NAPI_POLL_WEIGHT);
+
+	ret = devm_register_netdev(dev, netdev);
+	if (ret) {
+		netif_napi_del(&priv->napi);
+		phy_disconnect(netdev->phydev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int owl_emac_remove(struct platform_device *pdev)
+{
+	struct owl_emac_priv *priv = platform_get_drvdata(pdev);
+
+	netif_napi_del(&priv->napi);
+	phy_disconnect(priv->netdev->phydev);
+	cancel_work_sync(&priv->mac_reset_task);
+
+	return 0;
+}
+
+static const struct of_device_id owl_emac_of_match[] = {
+	{ .compatible = "actions,owl-emac", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, owl_emac_of_match);
+
+static SIMPLE_DEV_PM_OPS(owl_emac_pm_ops,
+			 owl_emac_suspend, owl_emac_resume);
+
+static struct platform_driver owl_emac_driver = {
+	.driver = {
+		.name = OWL_EMAC_DRVNAME,
+		.of_match_table = owl_emac_of_match,
+		.pm = &owl_emac_pm_ops,
+	},
+	.probe = owl_emac_probe,
+	.remove = owl_emac_remove,
+};
+module_platform_driver(owl_emac_driver);
+
+MODULE_DESCRIPTION("Actions Semi Owl SoCs Ethernet MAC Driver");
+MODULE_AUTHOR("Actions Semi Inc.");
+MODULE_AUTHOR("Cristian Ciocaltea <cristian.ciocaltea@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/actions/owl-emac.h b/drivers/net/ethernet/actions/owl-emac.h
new file mode 100644
index 000000000000..9eb0d1a30242
--- /dev/null
+++ b/drivers/net/ethernet/actions/owl-emac.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Actions Semi Owl SoCs Ethernet MAC driver
+ *
+ * Copyright (c) 2012 Actions Semi Inc.
+ * Copyright (c) 2021 Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
+ */
+
+#ifndef __OWL_EMAC_H__
+#define __OWL_EMAC_H__
+
+#define OWL_EMAC_DRVNAME			"owl-emac"
+
+#define OWL_EMAC_POLL_DELAY_USEC		5
+#define OWL_EMAC_MDIO_POLL_TIMEOUT_USEC		1000
+#define OWL_EMAC_RESET_POLL_TIMEOUT_USEC	2000
+#define OWL_EMAC_TX_TIMEOUT			(2 * HZ)
+
+#define OWL_EMAC_MTU_MIN			ETH_MIN_MTU
+#define OWL_EMAC_MTU_MAX			ETH_DATA_LEN
+#define OWL_EMAC_RX_FRAME_MAX_LEN		(ETH_FRAME_LEN + ETH_FCS_LEN)
+#define OWL_EMAC_SKB_ALIGN			4
+#define OWL_EMAC_SKB_RESERVE			18
+
+#define OWL_EMAC_MAX_MULTICAST_ADDRS		14
+#define OWL_EMAC_SETUP_FRAME_LEN		192
+
+#define OWL_EMAC_RX_RING_SIZE			64
+#define OWL_EMAC_TX_RING_SIZE			32
+
+/* Bus mode register */
+#define OWL_EMAC_REG_MAC_CSR0			0x0000
+#define OWL_EMAC_BIT_MAC_CSR0_SWR		BIT(0)	/* Software reset */
+
+/* Transmit/receive poll demand registers */
+#define OWL_EMAC_REG_MAC_CSR1			0x0008
+#define OWL_EMAC_VAL_MAC_CSR1_TPD		0x01
+#define OWL_EMAC_REG_MAC_CSR2			0x0010
+#define OWL_EMAC_VAL_MAC_CSR2_RPD		0x01
+
+/* Receive/transmit descriptor list base address registers */
+#define OWL_EMAC_REG_MAC_CSR3			0x0018
+#define OWL_EMAC_REG_MAC_CSR4			0x0020
+
+/* Status register */
+#define OWL_EMAC_REG_MAC_CSR5			0x0028
+#define OWL_EMAC_MSK_MAC_CSR5_TS		GENMASK(22, 20)	/* Transmit process state */
+#define OWL_EMAC_OFF_MAC_CSR5_TS		20
+#define OWL_EMAC_VAL_MAC_CSR5_TS_DATA		0x03	/* Transferring data HOST -> FIFO */
+#define OWL_EMAC_VAL_MAC_CSR5_TS_CDES		0x07	/* Closing transmit descriptor */
+#define OWL_EMAC_MSK_MAC_CSR5_RS		GENMASK(19, 17)	/* Receive process state */
+#define OWL_EMAC_OFF_MAC_CSR5_RS		17
+#define OWL_EMAC_VAL_MAC_CSR5_RS_FDES		0x01	/* Fetching receive descriptor */
+#define OWL_EMAC_VAL_MAC_CSR5_RS_CDES		0x05	/* Closing receive descriptor */
+#define OWL_EMAC_VAL_MAC_CSR5_RS_DATA		0x07	/* Transferring data FIFO -> HOST */
+#define OWL_EMAC_BIT_MAC_CSR5_NIS		BIT(16)	/* Normal interrupt summary */
+#define OWL_EMAC_BIT_MAC_CSR5_AIS		BIT(15)	/* Abnormal interrupt summary */
+#define OWL_EMAC_BIT_MAC_CSR5_ERI		BIT(14)	/* Early receive interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_GTE		BIT(11)	/* General-purpose timer expiration */
+#define OWL_EMAC_BIT_MAC_CSR5_ETI		BIT(10)	/* Early transmit interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_RPS		BIT(8)	/* Receive process stopped */
+#define OWL_EMAC_BIT_MAC_CSR5_RU		BIT(7)	/* Receive buffer unavailable */
+#define OWL_EMAC_BIT_MAC_CSR5_RI		BIT(6)	/* Receive interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_UNF		BIT(5)	/* Transmit underflow */
+#define OWL_EMAC_BIT_MAC_CSR5_LCIS		BIT(4)	/* Link change status */
+#define OWL_EMAC_BIT_MAC_CSR5_LCIQ		BIT(3)	/* Link change interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_TU		BIT(2)	/* Transmit buffer unavailable */
+#define OWL_EMAC_BIT_MAC_CSR5_TPS		BIT(1)	/* Transmit process stopped */
+#define OWL_EMAC_BIT_MAC_CSR5_TI		BIT(0)	/* Transmit interrupt */
+
+/* Operation mode register */
+#define OWL_EMAC_REG_MAC_CSR6			0x0030
+#define OWL_EMAC_BIT_MAC_CSR6_RA		BIT(30)	/* Receive all */
+#define OWL_EMAC_BIT_MAC_CSR6_TTM		BIT(22)	/* Transmit threshold mode */
+#define OWL_EMAC_BIT_MAC_CSR6_SF		BIT(21)	/* Store and forward */
+#define OWL_EMAC_MSK_MAC_CSR6_SPEED		GENMASK(17, 16)	/* Eth speed selection */
+#define OWL_EMAC_OFF_MAC_CSR6_SPEED		16
+#define OWL_EMAC_VAL_MAC_CSR6_SPEED_100M	0x00
+#define OWL_EMAC_VAL_MAC_CSR6_SPEED_10M		0x02
+#define OWL_EMAC_BIT_MAC_CSR6_ST		BIT(13)	/* Start/stop transmit command */
+#define OWL_EMAC_BIT_MAC_CSR6_LP		BIT(10)	/* Loopback mode */
+#define OWL_EMAC_BIT_MAC_CSR6_FD		BIT(9)	/* Full duplex mode */
+#define OWL_EMAC_BIT_MAC_CSR6_PM		BIT(7)	/* Pass all multicast */
+#define OWL_EMAC_BIT_MAC_CSR6_PR		BIT(6)	/* Promiscuous mode */
+#define OWL_EMAC_BIT_MAC_CSR6_IF		BIT(4)	/* Inverse filtering */
+#define OWL_EMAC_BIT_MAC_CSR6_PB		BIT(3)	/* Pass bad frames */
+#define OWL_EMAC_BIT_MAC_CSR6_HO		BIT(2)	/* Hash only filtering mode */
+#define OWL_EMAC_BIT_MAC_CSR6_SR		BIT(1)	/* Start/stop receive command */
+#define OWL_EMAC_BIT_MAC_CSR6_HP		BIT(0)	/* Hash/perfect receive filtering mode */
+#define OWL_EMAC_MSK_MAC_CSR6_STSR	       (OWL_EMAC_BIT_MAC_CSR6_ST | \
+						OWL_EMAC_BIT_MAC_CSR6_SR)
+
+/* Interrupt enable register */
+#define OWL_EMAC_REG_MAC_CSR7			0x0038
+#define OWL_EMAC_BIT_MAC_CSR7_NIE		BIT(16)	/* Normal interrupt summary enable */
+#define OWL_EMAC_BIT_MAC_CSR7_AIE		BIT(15)	/* Abnormal interrupt summary enable */
+#define OWL_EMAC_BIT_MAC_CSR7_ERE		BIT(14)	/* Early receive interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_GTE		BIT(11)	/* General-purpose timer overflow */
+#define OWL_EMAC_BIT_MAC_CSR7_ETE		BIT(10)	/* Early transmit interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_RSE		BIT(8)	/* Receive stopped enable */
+#define OWL_EMAC_BIT_MAC_CSR7_RUE		BIT(7)	/* Receive buffer unavailable enable */
+#define OWL_EMAC_BIT_MAC_CSR7_RIE		BIT(6)	/* Receive interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_UNE		BIT(5)	/* Underflow interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_TUE		BIT(2)	/* Transmit buffer unavailable enable */
+#define OWL_EMAC_BIT_MAC_CSR7_TSE		BIT(1)	/* Transmit stopped enable */
+#define OWL_EMAC_BIT_MAC_CSR7_TIE		BIT(0)	/* Transmit interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE      (OWL_EMAC_BIT_MAC_CSR7_ERE | \
+						OWL_EMAC_BIT_MAC_CSR7_GTE | \
+						OWL_EMAC_BIT_MAC_CSR7_ETE | \
+						OWL_EMAC_BIT_MAC_CSR7_RSE | \
+						OWL_EMAC_BIT_MAC_CSR7_RUE | \
+						OWL_EMAC_BIT_MAC_CSR7_RIE | \
+						OWL_EMAC_BIT_MAC_CSR7_UNE | \
+						OWL_EMAC_BIT_MAC_CSR7_TSE | \
+						OWL_EMAC_BIT_MAC_CSR7_TIE)
+
+/* Missed frames and overflow counter register */
+#define OWL_EMAC_REG_MAC_CSR8			0x0040
+/* MII management and serial ROM register */
+#define OWL_EMAC_REG_MAC_CSR9			0x0048
+
+/* MII serial management register */
+#define OWL_EMAC_REG_MAC_CSR10			0x0050
+#define OWL_EMAC_BIT_MAC_CSR10_SB		BIT(31)	/* Start transfer or busy */
+#define OWL_EMAC_MSK_MAC_CSR10_CLKDIV		GENMASK(30, 28)	/* Clock divider */
+#define OWL_EMAC_OFF_MAC_CSR10_CLKDIV		28
+#define OWL_EMAC_VAL_MAC_CSR10_CLKDIV_128	0x04
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR	0x01	/* Register write command */
+#define OWL_EMAC_OFF_MAC_CSR10_OPCODE		26	/* Operation mode */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_DCG	0x00	/* Disable clock generation */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR	0x01	/* Register write command */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_RD	0x02	/* Register read command */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_CDS	0x03	/* Clock divider set */
+#define OWL_EMAC_MSK_MAC_CSR10_PHYADD		GENMASK(25, 21)	/* Physical layer address */
+#define OWL_EMAC_OFF_MAC_CSR10_PHYADD		21
+#define OWL_EMAC_MSK_MAC_CSR10_REGADD		GENMASK(20, 16)	/* Register address */
+#define OWL_EMAC_OFF_MAC_CSR10_REGADD		16
+#define OWL_EMAC_MSK_MAC_CSR10_DATA		GENMASK(15, 0)	/* Register data */
+
+/* General-purpose timer and interrupt mitigation control register */
+#define OWL_EMAC_REG_MAC_CSR11			0x0058
+#define OWL_EMAC_OFF_MAC_CSR11_TT		27	/* Transmit timer */
+#define OWL_EMAC_OFF_MAC_CSR11_NTP		24	/* No. of transmit packets */
+#define OWL_EMAC_OFF_MAC_CSR11_RT		20	/* Receive timer */
+#define OWL_EMAC_OFF_MAC_CSR11_NRP		17	/* No. of receive packets */
+
+/* MAC address low/high registers */
+#define OWL_EMAC_REG_MAC_CSR16			0x0080
+#define OWL_EMAC_REG_MAC_CSR17			0x0088
+
+/* Pause time & cache thresholds register */
+#define OWL_EMAC_REG_MAC_CSR18			0x0090
+#define OWL_EMAC_OFF_MAC_CSR18_CPTL		24	/* Cache pause threshold level */
+#define OWL_EMAC_OFF_MAC_CSR18_CRTL		16	/* Cache restart threshold level */
+#define OWL_EMAC_OFF_MAC_CSR18_PQT		0	/* Flow control pause quanta time */
+
+/* FIFO pause & restart threshold register */
+#define OWL_EMAC_REG_MAC_CSR19			0x0098
+#define OWL_EMAC_OFF_MAC_CSR19_FPTL		16	/* FIFO pause threshold level */
+#define OWL_EMAC_OFF_MAC_CSR19_FRTL		0	/* FIFO restart threshold level */
+
+/* Flow control setup & status register */
+#define OWL_EMAC_REG_MAC_CSR20			0x00A0
+#define OWL_EMAC_BIT_MAC_CSR20_FCE		BIT(31)	/* Flow Control Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_TUE		BIT(30)	/* Transmit Un-pause frames Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_TPE		BIT(29)	/* Transmit Pause frames Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_RPE		BIT(28)	/* Receive Pause frames Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_BPE		BIT(27)	/* Back pressure (half-duplex) Enable */
+
+/* MII control register */
+#define OWL_EMAC_REG_MAC_CTRL			0x00B0
+#define OWL_EMAC_BIT_MAC_CTRL_RRSB		BIT(8)	/* RMII_REFCLK select bit */
+#define OWL_EMAC_OFF_MAC_CTRL_SSDC		4	/* SMII SYNC delay cycle */
+#define OWL_EMAC_BIT_MAC_CTRL_RCPS		BIT(1)	/* REF_CLK phase select */
+#define OWL_EMAC_BIT_MAC_CTRL_RSIS		BIT(0)	/* RMII/SMII interface select */
+
+/* Receive descriptor status field */
+#define OWL_EMAC_BIT_RDES0_OWN			BIT(31)	/* Ownership bit */
+#define OWL_EMAC_BIT_RDES0_FF			BIT(30)	/* Filtering fail */
+#define OWL_EMAC_MSK_RDES0_FL			GENMASK(29, 16)	/* Frame length */
+#define OWL_EMAC_OFF_RDES0_FL			16
+#define OWL_EMAC_BIT_RDES0_ES			BIT(15)	/* Error summary */
+#define OWL_EMAC_BIT_RDES0_DE			BIT(14)	/* Descriptor error */
+#define OWL_EMAC_BIT_RDES0_RF			BIT(11)	/* Runt frame */
+#define OWL_EMAC_BIT_RDES0_MF			BIT(10)	/* Multicast frame */
+#define OWL_EMAC_BIT_RDES0_FS			BIT(9)	/* First descriptor */
+#define OWL_EMAC_BIT_RDES0_LS			BIT(8)	/* Last descriptor */
+#define OWL_EMAC_BIT_RDES0_TL			BIT(7)	/* Frame too long */
+#define OWL_EMAC_BIT_RDES0_CS			BIT(6)	/* Collision seen */
+#define OWL_EMAC_BIT_RDES0_FT			BIT(5)	/* Frame type */
+#define OWL_EMAC_BIT_RDES0_RE			BIT(3)	/* Report on MII error */
+#define OWL_EMAC_BIT_RDES0_DB			BIT(2)	/* Dribbling bit */
+#define OWL_EMAC_BIT_RDES0_CE			BIT(1)	/* CRC error */
+#define OWL_EMAC_BIT_RDES0_ZERO			BIT(0)	/* Legal frame length indicator */
+
+/* Receive descriptor control and count field */
+#define OWL_EMAC_BIT_RDES1_RER			BIT(25)	/* Receive end of ring */
+#define OWL_EMAC_MSK_RDES1_RBS1			GENMASK(10, 0) /* Buffer 1 size */
+
+/* Transmit descriptor status field */
+#define OWL_EMAC_BIT_TDES0_OWN			BIT(31)	/* Ownership bit */
+#define OWL_EMAC_BIT_TDES0_ES			BIT(15)	/* Error summary */
+#define OWL_EMAC_BIT_TDES0_LO			BIT(11)	/* Loss of carrier */
+#define OWL_EMAC_BIT_TDES0_NC			BIT(10)	/* No carrier */
+#define OWL_EMAC_BIT_TDES0_LC			BIT(9)	/* Late collision */
+#define OWL_EMAC_BIT_TDES0_EC			BIT(8)	/* Excessive collisions */
+#define OWL_EMAC_MSK_TDES0_CC			GENMASK(6, 3) /* Collision count */
+#define OWL_EMAC_BIT_TDES0_UF			BIT(1)	/* Underflow error */
+#define OWL_EMAC_BIT_TDES0_DE			BIT(0)	/* Deferred */
+
+/* Transmit descriptor control and count field */
+#define OWL_EMAC_BIT_TDES1_IC			BIT(31)	/* Interrupt on completion */
+#define OWL_EMAC_BIT_TDES1_LS			BIT(30)	/* Last descriptor */
+#define OWL_EMAC_BIT_TDES1_FS			BIT(29)	/* First descriptor */
+#define OWL_EMAC_BIT_TDES1_FT1			BIT(28)	/* Filtering type */
+#define OWL_EMAC_BIT_TDES1_SET			BIT(27)	/* Setup packet */
+#define OWL_EMAC_BIT_TDES1_AC			BIT(26)	/* Add CRC disable */
+#define OWL_EMAC_BIT_TDES1_TER			BIT(25)	/* Transmit end of ring */
+#define OWL_EMAC_BIT_TDES1_DPD			BIT(23)	/* Disabled padding */
+#define OWL_EMAC_BIT_TDES1_FT0			BIT(22)	/* Filtering type */
+#define OWL_EMAC_MSK_TDES1_TBS1			GENMASK(10, 0) /* Buffer 1 size */
+
+static const char *const owl_emac_clk_names[] = { "eth", "rmii" };
+#define OWL_EMAC_NCLKS ARRAY_SIZE(owl_emac_clk_names)
+
+enum owl_emac_clk_map {
+	OWL_EMAC_CLK_ETH = 0,
+	OWL_EMAC_CLK_RMII
+};
+
+struct owl_emac_addr_list {
+	u8 addrs[OWL_EMAC_MAX_MULTICAST_ADDRS][ETH_ALEN];
+	int count;
+};
+
+/* TX/RX descriptors */
+struct owl_emac_ring_desc {
+	u32 status;
+	u32 control;
+	u32 buf_addr;
+	u32 reserved;		/* 2nd buffer address is not used */
+};
+
+struct owl_emac_ring {
+	struct owl_emac_ring_desc *descs;
+	dma_addr_t descs_dma;
+	struct sk_buff **skbs;
+	dma_addr_t *skbs_dma;
+	unsigned int size;
+	unsigned int head;
+	unsigned int tail;
+};
+
+struct owl_emac_priv {
+	struct net_device *netdev;
+	void __iomem *base;
+
+	struct clk_bulk_data clks[OWL_EMAC_NCLKS];
+	struct reset_control *reset;
+
+	struct owl_emac_ring rx_ring;
+	struct owl_emac_ring tx_ring;
+
+	struct mii_bus *mii;
+	struct napi_struct napi;
+
+	phy_interface_t phy_mode;
+	unsigned int link;
+	int speed;
+	int duplex;
+	int pause;
+	struct owl_emac_addr_list mcaddr_list;
+
+	struct work_struct mac_reset_task;
+
+	u32 msg_enable;		/* Debug message level */
+	spinlock_t lock;	/* Sync concurrent ring access */
+};
+
+#endif /* __OWL_EMAC_H__ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 343caf41e709..73b03ce59412 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -124,7 +124,7 @@ struct ena_com_io_cq {
 
 	/* holds the number of cdesc of the current packet */
 	u16 cur_rx_pkt_cdesc_count;
-	/* save the firt cdesc idx of the current packet */
+	/* save the first cdesc idx of the current packet */
 	u16 cur_rx_pkt_cdesc_start_idx;
 
 	u16 q_depth;
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index d6cc7aa612b7..2fe7ccee55b2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -251,10 +251,10 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
 		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
 			ena_stats = &ena_stats_tx_strings[j];
 
-			snprintf(*data, ETH_GSTRING_LEN,
-				 "queue_%u_%s_%s", i,
-				 is_xdp ? "xdp_tx" : "tx", ena_stats->name);
-			(*data) += ETH_GSTRING_LEN;
+			ethtool_sprintf(data,
+					"queue_%u_%s_%s", i,
+					is_xdp ? "xdp_tx" : "tx",
+					ena_stats->name);
 		}
 
 		if (!is_xdp) {
@@ -264,9 +264,9 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
 			for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
 				ena_stats = &ena_stats_rx_strings[j];
 
-				snprintf(*data, ETH_GSTRING_LEN,
-					 "queue_%u_rx_%s", i, ena_stats->name);
-				(*data) += ETH_GSTRING_LEN;
+				ethtool_sprintf(data,
+						"queue_%u_rx_%s", i,
+						ena_stats->name);
 			}
 		}
 	}
@@ -280,9 +280,8 @@ static void ena_com_dev_strings(u8 **data)
 	for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
 		ena_stats = &ena_stats_ena_com_strings[i];
 
-		snprintf(*data, ETH_GSTRING_LEN,
-			 "ena_admin_q_%s", ena_stats->name);
-		(*data) += ETH_GSTRING_LEN;
+		ethtool_sprintf(data,
+				"ena_admin_q_%s", ena_stats->name);
 	}
 }
 
@@ -295,15 +294,13 @@ static void ena_get_strings(struct ena_adapter *adapter,
 
 	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
 		ena_stats = &ena_stats_global_strings[i];
-		memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
-		data += ETH_GSTRING_LEN;
+		ethtool_sprintf(&data, ena_stats->name);
 	}
 
 	if (eni_stats_needed) {
 		for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
 			ena_stats = &ena_stats_eni_strings[i];
-			memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
-			data += ETH_GSTRING_LEN;
+			ethtool_sprintf(&data, ena_stats->name);
 		}
 	}
 
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 3f65f2b370c5..d54375b255dc 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -47,7 +47,7 @@ static void atl1c_down(struct atl1c_adapter *adapter);
 static int atl1c_reset_mac(struct atl1c_hw *hw);
 static void atl1c_reset_dma_ring(struct atl1c_adapter *adapter);
 static int atl1c_configure(struct atl1c_adapter *adapter);
-static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter);
+static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode);
 
 
 static const u32 atl1c_default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -470,7 +470,7 @@ static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 	adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ?
 		roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE;
 
-	head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD) +
+	head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) +
 		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	adapter->rx_frag_size = roundup_pow_of_two(head_size);
 }
@@ -1434,7 +1434,7 @@ static int atl1c_configure(struct atl1c_adapter *adapter)
 	atl1c_set_multi(netdev);
 	atl1c_restore_vlan(adapter);
 
-	num = atl1c_alloc_rx_buffer(adapter);
+	num = atl1c_alloc_rx_buffer(adapter, false);
 	if (unlikely(num == 0))
 		return -ENOMEM;
 
@@ -1650,14 +1650,20 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
 	skb_checksum_none_assert(skb);
 }
 
-static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
+static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
+				       bool napi_mode)
 {
 	struct sk_buff *skb;
 	struct page *page;
 
-	if (adapter->rx_frag_size > PAGE_SIZE)
-		return netdev_alloc_skb(adapter->netdev,
-					adapter->rx_buffer_len);
+	if (adapter->rx_frag_size > PAGE_SIZE) {
+		if (likely(napi_mode))
+			return napi_alloc_skb(&adapter->napi,
+					      adapter->rx_buffer_len);
+		else
+			return netdev_alloc_skb_ip_align(adapter->netdev,
+							 adapter->rx_buffer_len);
+	}
 
 	page = adapter->rx_page;
 	if (!page) {
@@ -1670,7 +1676,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
 	skb = build_skb(page_address(page) + adapter->rx_page_offset,
 			adapter->rx_frag_size);
 	if (likely(skb)) {
-		skb_reserve(skb, NET_SKB_PAD);
+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 		adapter->rx_page_offset += adapter->rx_frag_size;
 		if (adapter->rx_page_offset >= PAGE_SIZE)
 			adapter->rx_page = NULL;
@@ -1680,7 +1686,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
 	return skb;
 }
 
-static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
+static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode)
 {
 	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
 	struct pci_dev *pdev = adapter->pdev;
@@ -1701,7 +1707,7 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
 	while (next_info->flags & ATL1C_BUFFER_FREE) {
 		rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use);
 
-		skb = atl1c_alloc_skb(adapter);
+		skb = atl1c_alloc_skb(adapter, napi_mode);
 		if (unlikely(!skb)) {
 			if (netif_msg_rx_err(adapter))
 				dev_warn(&pdev->dev, "alloc rx buffer failed\n");
@@ -1851,13 +1857,13 @@ rrs_checked:
 			vlan = le16_to_cpu(vlan);
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan);
 		}
-		netif_receive_skb(skb);
+		napi_gro_receive(&adapter->napi, skb);
 
 		(*work_done)++;
 		count++;
 	}
 	if (count)
-		atl1c_alloc_rx_buffer(adapter);
+		atl1c_alloc_rx_buffer(adapter, true);
 }
 
 /**
diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 98cf82dea3e4..cbfed1d1477b 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -9,6 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_net.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -53,6 +54,7 @@ struct bcm4908_enet_dma_ring {
 	int length;
 	u16 cfg_block;
 	u16 st_ram_block;
+	struct napi_struct napi;
 
 	union {
 		void *cpu_addr;
@@ -66,8 +68,8 @@ struct bcm4908_enet_dma_ring {
 struct bcm4908_enet {
 	struct device *dev;
 	struct net_device *netdev;
-	struct napi_struct napi;
 	void __iomem *base;
+	int irq_tx;
 
 	struct bcm4908_enet_dma_ring tx_ring;
 	struct bcm4908_enet_dma_ring rx_ring;
@@ -122,24 +124,31 @@ static void enet_umac_set(struct bcm4908_enet *enet, u16 offset, u32 set)
  * Helpers
  */
 
-static void bcm4908_enet_intrs_on(struct bcm4908_enet *enet)
+static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu)
 {
-	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS);
+	enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD);
 }
 
-static void bcm4908_enet_intrs_off(struct bcm4908_enet *enet)
+/***
+ * DMA ring ops
+ */
+
+static void bcm4908_enet_dma_ring_intrs_on(struct bcm4908_enet *enet,
+					   struct bcm4908_enet_dma_ring *ring)
 {
-	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0);
+	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS);
 }
 
-static void bcm4908_enet_intrs_ack(struct bcm4908_enet *enet)
+static void bcm4908_enet_dma_ring_intrs_off(struct bcm4908_enet *enet,
+					    struct bcm4908_enet_dma_ring *ring)
 {
-	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS);
+	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0);
 }
 
-static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu)
+static void bcm4908_enet_dma_ring_intrs_ack(struct bcm4908_enet *enet,
+					    struct bcm4908_enet_dma_ring *ring)
 {
-	enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD);
+	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS);
 }
 
 /***
@@ -413,11 +422,14 @@ static void bcm4908_enet_gmac_init(struct bcm4908_enet *enet)
 static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id)
 {
 	struct bcm4908_enet *enet = dev_id;
+	struct bcm4908_enet_dma_ring *ring;
 
-	bcm4908_enet_intrs_off(enet);
-	bcm4908_enet_intrs_ack(enet);
+	ring = (irq == enet->irq_tx) ? &enet->tx_ring : &enet->rx_ring;
 
-	napi_schedule(&enet->napi);
+	bcm4908_enet_dma_ring_intrs_off(enet, ring);
+	bcm4908_enet_dma_ring_intrs_ack(enet, ring);
+
+	napi_schedule(&ring->napi);
 
 	return IRQ_HANDLED;
 }
@@ -425,6 +437,8 @@ static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id)
 static int bcm4908_enet_open(struct net_device *netdev)
 {
 	struct bcm4908_enet *enet = netdev_priv(netdev);
+	struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring;
+	struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
 	struct device *dev = enet->dev;
 	int err;
 
@@ -434,6 +448,17 @@ static int bcm4908_enet_open(struct net_device *netdev)
 		return err;
 	}
 
+	if (enet->irq_tx > 0) {
+		err = request_irq(enet->irq_tx, bcm4908_enet_irq_handler, 0,
+				  "tx", enet);
+		if (err) {
+			dev_err(dev, "Failed to request IRQ %d: %d\n",
+				enet->irq_tx, err);
+			free_irq(netdev->irq, enet);
+			return err;
+		}
+	}
+
 	bcm4908_enet_gmac_init(enet);
 	bcm4908_enet_dma_reset(enet);
 	bcm4908_enet_dma_init(enet);
@@ -442,14 +467,19 @@ static int bcm4908_enet_open(struct net_device *netdev)
 
 	enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN);
 	enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0);
-	bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring);
 
-	napi_enable(&enet->napi);
+	if (enet->irq_tx > 0) {
+		napi_enable(&tx_ring->napi);
+		bcm4908_enet_dma_ring_intrs_ack(enet, tx_ring);
+		bcm4908_enet_dma_ring_intrs_on(enet, tx_ring);
+	}
+
+	bcm4908_enet_dma_rx_ring_enable(enet, rx_ring);
+	napi_enable(&rx_ring->napi);
 	netif_carrier_on(netdev);
 	netif_start_queue(netdev);
-
-	bcm4908_enet_intrs_ack(enet);
-	bcm4908_enet_intrs_on(enet);
+	bcm4908_enet_dma_ring_intrs_ack(enet, rx_ring);
+	bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
 
 	return 0;
 }
@@ -457,16 +487,20 @@ static int bcm4908_enet_open(struct net_device *netdev)
 static int bcm4908_enet_stop(struct net_device *netdev)
 {
 	struct bcm4908_enet *enet = netdev_priv(netdev);
+	struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring;
+	struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
 
 	netif_stop_queue(netdev);
 	netif_carrier_off(netdev);
-	napi_disable(&enet->napi);
+	napi_disable(&rx_ring->napi);
+	napi_disable(&tx_ring->napi);
 
 	bcm4908_enet_dma_rx_ring_disable(enet, &enet->rx_ring);
 	bcm4908_enet_dma_tx_ring_disable(enet, &enet->tx_ring);
 
 	bcm4908_enet_dma_uninit(enet);
 
+	free_irq(enet->irq_tx, enet);
 	free_irq(enet->netdev->irq, enet);
 
 	return 0;
@@ -483,25 +517,19 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde
 	u32 tmp;
 
 	/* Free transmitted skbs */
-	while (ring->read_idx != ring->write_idx) {
-		buf_desc = &ring->buf_desc[ring->read_idx];
-		if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)
-			break;
-		slot = &ring->slots[ring->read_idx];
-
-		dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
-		dev_kfree_skb(slot->skb);
-		if (++ring->read_idx == ring->length)
-			ring->read_idx = 0;
-	}
+	if (enet->irq_tx < 0 &&
+	    !(le32_to_cpu(ring->buf_desc[ring->read_idx].ctl) & DMA_CTL_STATUS_OWN))
+		napi_schedule(&enet->tx_ring.napi);
 
 	/* Don't use the last empty buf descriptor */
 	if (ring->read_idx <= ring->write_idx)
 		free_buf_descs = ring->read_idx - ring->write_idx + ring->length;
 	else
 		free_buf_descs = ring->read_idx - ring->write_idx;
-	if (free_buf_descs < 2)
+	if (free_buf_descs < 2) {
+		netif_stop_queue(netdev);
 		return NETDEV_TX_BUSY;
+	}
 
 	/* Hardware removes OWN bit after sending data */
 	buf_desc = &ring->buf_desc[ring->write_idx];
@@ -538,9 +566,10 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde
 	return NETDEV_TX_OK;
 }
 
-static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
+static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight)
 {
-	struct bcm4908_enet *enet = container_of(napi, struct bcm4908_enet, napi);
+	struct bcm4908_enet_dma_ring *rx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi);
+	struct bcm4908_enet *enet = container_of(rx_ring, struct bcm4908_enet, rx_ring);
 	struct device *dev = enet->dev;
 	int handled = 0;
 
@@ -589,7 +618,7 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 
 	if (handled < weight) {
 		napi_complete_done(napi, handled);
-		bcm4908_enet_intrs_on(enet);
+		bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
 	}
 
 	/* Hardware could disable ring if it run out of descriptors */
@@ -598,6 +627,42 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 	return handled;
 }
 
+static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight)
+{
+	struct bcm4908_enet_dma_ring *tx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi);
+	struct bcm4908_enet *enet = container_of(tx_ring, struct bcm4908_enet, tx_ring);
+	struct bcm4908_enet_dma_ring_bd *buf_desc;
+	struct bcm4908_enet_dma_ring_slot *slot;
+	struct device *dev = enet->dev;
+	unsigned int bytes = 0;
+	int handled = 0;
+
+	while (handled < weight && tx_ring->read_idx != tx_ring->write_idx) {
+		buf_desc = &tx_ring->buf_desc[tx_ring->read_idx];
+		if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)
+			break;
+		slot = &tx_ring->slots[tx_ring->read_idx];
+
+		dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
+		dev_kfree_skb(slot->skb);
+		bytes += slot->len;
+		if (++tx_ring->read_idx == tx_ring->length)
+			tx_ring->read_idx = 0;
+
+		handled++;
+	}
+
+	if (handled < weight) {
+		napi_complete_done(napi, handled);
+		bcm4908_enet_dma_ring_intrs_on(enet, tx_ring);
+	}
+
+	if (netif_queue_stopped(enet->netdev))
+		netif_wake_queue(enet->netdev);
+
+	return handled;
+}
+
 static int bcm4908_enet_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct bcm4908_enet *enet = netdev_priv(netdev);
@@ -620,6 +685,7 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct net_device *netdev;
 	struct bcm4908_enet *enet;
+	const u8 *mac;
 	int err;
 
 	netdev = devm_alloc_etherdev(dev, sizeof(*enet));
@@ -640,6 +706,8 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 	if (netdev->irq < 0)
 		return netdev->irq;
 
+	enet->irq_tx = platform_get_irq_byname(pdev, "tx");
+
 	dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
 
 	err = bcm4908_enet_dma_alloc(enet);
@@ -647,12 +715,17 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 		return err;
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	eth_hw_addr_random(netdev);
+	mac = of_get_mac_address(dev->of_node);
+	if (!IS_ERR(mac))
+		ether_addr_copy(netdev->dev_addr, mac);
+	else
+		eth_hw_addr_random(netdev);
 	netdev->netdev_ops = &bcm4908_enet_netdev_ops;
 	netdev->min_mtu = ETH_ZLEN;
 	netdev->mtu = ETH_DATA_LEN;
 	netdev->max_mtu = ENET_MTU_MAX;
-	netif_napi_add(netdev, &enet->napi, bcm4908_enet_poll, 64);
+	netif_tx_napi_add(netdev, &enet->tx_ring.napi, bcm4908_enet_poll_tx, NAPI_POLL_WEIGHT);
+	netif_napi_add(netdev, &enet->rx_ring.napi, bcm4908_enet_poll_rx, NAPI_POLL_WEIGHT);
 
 	err = register_netdev(netdev);
 	if (err) {
@@ -670,7 +743,8 @@ static int bcm4908_enet_remove(struct platform_device *pdev)
 	struct bcm4908_enet *enet = platform_get_drvdata(pdev);
 
 	unregister_netdev(enet->netdev);
-	netif_napi_del(&enet->napi);
+	netif_napi_del(&enet->rx_ring.napi);
+	netif_napi_del(&enet->tx_ring.napi);
 	bcm4908_enet_dma_free(enet);
 
 	return 0;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index b652ed72a621..56801387591d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1395,7 +1395,6 @@ int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt)
 	u32 op_gen_command = 0;
 	u32 comp_addr = BAR_CSTRORM_INTMEM +
 			CSTORM_FINAL_CLEANUP_COMPLETE_OFFSET(clnup_func);
-	int ret = 0;
 
 	if (REG_RD(bp, comp_addr)) {
 		BNX2X_ERR("Cleanup complete was not 0 before sending\n");
@@ -1420,7 +1419,7 @@ int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt)
 	/* Zero completion for next FLR */
 	REG_WR(bp, comp_addr, 0);
 
-	return ret;
+	return 0;
 }
 
 u8 bnx2x_is_pcie_pending(struct pci_dev *dev)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b53a0d87371a..6f13642121c4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4470,7 +4470,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	writel(1, bp->bar0 + doorbell_offset);
 
 	if (!pci_is_enabled(bp->pdev))
-		return 0;
+		return -ENODEV;
 
 	if (!timeout)
 		timeout = DFLT_HWRM_CMD_TIMEOUT;
@@ -4500,12 +4500,15 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
 				return -EBUSY;
 			/* on first few passes, just barely sleep */
-			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+			if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
 				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
 					     HWRM_SHORT_MAX_TIMEOUT);
-			else
+			} else {
+				if (HWRM_WAIT_MUST_ABORT(bp, req))
+					break;
 				usleep_range(HWRM_MIN_TIMEOUT,
 					     HWRM_MAX_TIMEOUT);
+			}
 		}
 
 		if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
@@ -4530,15 +4533,19 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			if (len)
 				break;
 			/* on first few passes, just barely sleep */
-			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+			if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
 				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
 					     HWRM_SHORT_MAX_TIMEOUT);
-			else
+			} else {
+				if (HWRM_WAIT_MUST_ABORT(bp, req))
+					goto timeout_abort;
 				usleep_range(HWRM_MIN_TIMEOUT,
 					     HWRM_MAX_TIMEOUT);
+			}
 		}
 
 		if (i >= tmo_count) {
+timeout_abort:
 			if (!silent)
 				netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
 					   HWRM_TOTAL_TIMEOUT(i),
@@ -7540,6 +7547,32 @@ static void __bnxt_map_fw_health_reg(struct bnxt *bp, u32 reg)
 					 BNXT_FW_HEALTH_WIN_MAP_OFF);
 }
 
+bool bnxt_is_fw_healthy(struct bnxt *bp)
+{
+	if (bp->fw_health && bp->fw_health->status_reliable) {
+		u32 fw_status;
+
+		fw_status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+		if (fw_status && !BNXT_FW_IS_HEALTHY(fw_status))
+			return false;
+	}
+
+	return true;
+}
+
+static void bnxt_inv_fw_health_reg(struct bnxt *bp)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	u32 reg_type;
+
+	if (!fw_health || !fw_health->status_reliable)
+		return;
+
+	reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_HEALTH_REG]);
+	if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC)
+		fw_health->status_reliable = false;
+}
+
 static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 {
 	void __iomem *hs;
@@ -7547,6 +7580,9 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 	u32 reg_type;
 	u32 sig;
 
+	if (bp->fw_health)
+		bp->fw_health->status_reliable = false;
+
 	__bnxt_map_fw_health_reg(bp, HCOMM_STATUS_STRUCT_LOC);
 	hs = bp->bar0 + BNXT_FW_HEALTH_WIN_OFF(HCOMM_STATUS_STRUCT_LOC);
 
@@ -7558,11 +7594,9 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 					     BNXT_FW_HEALTH_WIN_BASE +
 					     BNXT_GRC_REG_CHIP_NUM);
 		}
-		if (!BNXT_CHIP_P5(bp)) {
-			if (bp->fw_health)
-				bp->fw_health->status_reliable = false;
+		if (!BNXT_CHIP_P5(bp))
 			return;
-		}
+
 		status_loc = BNXT_GRC_REG_STATUS_P5 |
 			     BNXT_FW_HEALTH_REG_TYPE_BAR0;
 	} else {
@@ -7592,6 +7626,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
 	u32 reg_base = 0xffffffff;
 	int i;
 
+	bp->fw_health->status_reliable = false;
 	/* Only pre-map the monitoring GRC registers using window 3 */
 	for (i = 0; i < 4; i++) {
 		u32 reg = fw_health->regs[i];
@@ -7604,6 +7639,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
 			return -ERANGE;
 		fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_OFF(reg);
 	}
+	bp->fw_health->status_reliable = true;
 	if (reg_base == 0xffffffff)
 		return 0;
 
@@ -9494,9 +9530,10 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
 
 		mutex_lock(&bp->hwrm_cmd_lock);
 		do {
-			rc = __bnxt_hwrm_ver_get(bp, true);
 			sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
-			if (!sts || !BNXT_FW_IS_BOOTING(sts))
+			rc = __bnxt_hwrm_ver_get(bp, true);
+			if (!sts || (!BNXT_FW_IS_BOOTING(sts) &&
+				     !BNXT_FW_IS_RECOVERING(sts)))
 				break;
 			retry++;
 		} while (rc == -EBUSY && retry < BNXT_FW_RETRY);
@@ -9556,13 +9593,17 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 	if (rc)
 		return rc;
 
-	if (!up)
+	if (!up) {
+		bnxt_inv_fw_health_reg(bp);
 		return 0;
+	}
 
 	if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE)
 		resc_reinit = true;
 	if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE)
 		fw_reset = true;
+	else if (bp->fw_health && !bp->fw_health->status_reliable)
+		bnxt_try_map_fw_health_reg(bp);
 
 	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) {
 		netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n");
@@ -9571,6 +9612,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 	}
 	if (resc_reinit || fw_reset) {
 		if (fw_reset) {
+			set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 			if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 				bnxt_ulp_stop(bp);
 			bnxt_free_ctx_mem(bp);
@@ -9579,21 +9621,25 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 			bnxt_dcb_free(bp);
 			rc = bnxt_fw_init_one(bp);
 			if (rc) {
+				clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 				set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
 				return rc;
 			}
 			bnxt_clear_int_mode(bp);
 			rc = bnxt_init_int_mode(bp);
 			if (rc) {
+				clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 				netdev_err(bp->dev, "init int mode failed\n");
 				return rc;
 			}
-			set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 		}
 		if (BNXT_NEW_RM(bp)) {
 			struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 
 			rc = bnxt_hwrm_func_resc_qcaps(bp, true);
+			if (rc)
+				netdev_err(bp->dev, "resc_qcaps failed\n");
+
 			hw_resc->resv_cp_rings = 0;
 			hw_resc->resv_stat_ctxs = 0;
 			hw_resc->resv_irqs = 0;
@@ -9607,7 +9653,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 			}
 		}
 	}
-	return 0;
+	return rc;
 }
 
 static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
@@ -11640,7 +11686,7 @@ static void bnxt_reset_all(struct bnxt *bp)
 		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
 		req.flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
 		rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-		if (rc)
+		if (rc != -ENODEV)
 			netdev_warn(bp->dev, "Unable to reset FW rc=%d\n", rc);
 	}
 	bp->fw_reset_timestamp = jiffies;
@@ -11723,28 +11769,20 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 		bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
 		return;
 	case BNXT_FW_RESET_STATE_ENABLE_DEV:
-		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
-			u32 val;
-
-			if (!bp->fw_reset_min_dsecs) {
-				u16 val;
-
-				pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID,
-						     &val);
-				if (val == 0xffff) {
-					if (bnxt_fw_reset_timeout(bp)) {
-						netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n");
-						goto fw_reset_abort;
-					}
-					bnxt_queue_fw_reset_work(bp, HZ / 1000);
-					return;
+		bnxt_inv_fw_health_reg(bp);
+		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
+		    !bp->fw_reset_min_dsecs) {
+			u16 val;
+
+			pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val);
+			if (val == 0xffff) {
+				if (bnxt_fw_reset_timeout(bp)) {
+					netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n");
+					goto fw_reset_abort;
 				}
+				bnxt_queue_fw_reset_work(bp, HZ / 1000);
+				return;
 			}
-			val = bnxt_fw_health_readl(bp,
-						   BNXT_FW_RESET_INPROG_REG);
-			if (val)
-				netdev_warn(bp->dev, "FW reset inprog %x after min wait time.\n",
-					    val);
 		}
 		clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
 		if (pci_enable_device(bp->pdev)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 1259e68cba2a..29061c577baa 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -671,6 +671,10 @@ struct nqe_cn {
 #define HWRM_MIN_TIMEOUT		25
 #define HWRM_MAX_TIMEOUT		40
 
+#define HWRM_WAIT_MUST_ABORT(bp, req)					\
+	(le16_to_cpu((req)->req_type) != HWRM_VER_GET &&		\
+	 !bnxt_is_fw_healthy(bp))
+
 #define HWRM_TOTAL_TIMEOUT(n)	(((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ?	\
 	((n) * HWRM_SHORT_MIN_TIMEOUT) :				\
 	(HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +		\
@@ -1560,6 +1564,7 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_STATUS_HEALTH_MSK	0xffff
 #define BNXT_FW_STATUS_HEALTHY		0x8000
 #define BNXT_FW_STATUS_SHUTDOWN		0x100000
+#define BNXT_FW_STATUS_RECOVERING	0x400000
 
 #define BNXT_FW_IS_HEALTHY(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\
 					 BNXT_FW_STATUS_HEALTHY)
@@ -1570,6 +1575,9 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_IS_ERR(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \
 					 BNXT_FW_STATUS_HEALTHY)
 
+#define BNXT_FW_IS_RECOVERING(sts)	(BNXT_FW_IS_ERR(sts) &&		       \
+					 ((sts) & BNXT_FW_STATUS_RECOVERING))
+
 #define BNXT_FW_RETRY			5
 #define BNXT_FW_IF_RETRY		10
 
@@ -2228,6 +2236,7 @@ int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
 int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
+bool bnxt_is_fw_healthy(struct bnxt *bp);
 int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 588c4804d10a..265c2fa6bbe0 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -524,6 +524,68 @@ bnad_set_pauseparam(struct net_device *netdev,
 	return 0;
 }
 
+static void bnad_get_txf_strings(u8 **string, int f_num)
+{
+	ethtool_sprintf(string, "txf%d_ucast_octets", f_num);
+	ethtool_sprintf(string, "txf%d_ucast", f_num);
+	ethtool_sprintf(string, "txf%d_ucast_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_mcast_octets", f_num);
+	ethtool_sprintf(string, "txf%d_mcast", f_num);
+	ethtool_sprintf(string, "txf%d_mcast_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_bcast_octets", f_num);
+	ethtool_sprintf(string, "txf%d_bcast", f_num);
+	ethtool_sprintf(string, "txf%d_bcast_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_errors", f_num);
+	ethtool_sprintf(string, "txf%d_filter_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_filter_mac_sa", f_num);
+}
+
+static void bnad_get_rxf_strings(u8 **string, int f_num)
+{
+	ethtool_sprintf(string, "rxf%d_ucast_octets", f_num);
+	ethtool_sprintf(string, "rxf%d_ucast", f_num);
+	ethtool_sprintf(string, "rxf%d_ucast_vlan", f_num);
+	ethtool_sprintf(string, "rxf%d_mcast_octets", f_num);
+	ethtool_sprintf(string, "rxf%d_mcast", f_num);
+	ethtool_sprintf(string, "rxf%d_mcast_vlan", f_num);
+	ethtool_sprintf(string, "rxf%d_bcast_octets", f_num);
+	ethtool_sprintf(string, "rxf%d_bcast", f_num);
+	ethtool_sprintf(string, "rxf%d_bcast_vlan", f_num);
+	ethtool_sprintf(string, "rxf%d_frame_drops", f_num);
+}
+
+static void bnad_get_cq_strings(u8 **string, int q_num)
+{
+	ethtool_sprintf(string, "cq%d_producer_index", q_num);
+	ethtool_sprintf(string, "cq%d_consumer_index", q_num);
+	ethtool_sprintf(string, "cq%d_hw_producer_index", q_num);
+	ethtool_sprintf(string, "cq%d_intr", q_num);
+	ethtool_sprintf(string, "cq%d_poll", q_num);
+	ethtool_sprintf(string, "cq%d_schedule", q_num);
+	ethtool_sprintf(string, "cq%d_keep_poll", q_num);
+	ethtool_sprintf(string, "cq%d_complete", q_num);
+}
+
+static void bnad_get_rxq_strings(u8 **string, int q_num)
+{
+	ethtool_sprintf(string, "rxq%d_packets", q_num);
+	ethtool_sprintf(string, "rxq%d_bytes", q_num);
+	ethtool_sprintf(string, "rxq%d_packets_with_error", q_num);
+	ethtool_sprintf(string, "rxq%d_allocbuf_failed", q_num);
+	ethtool_sprintf(string, "rxq%d_mapbuf_failed", q_num);
+	ethtool_sprintf(string, "rxq%d_producer_index", q_num);
+	ethtool_sprintf(string, "rxq%d_consumer_index", q_num);
+}
+
+static void bnad_get_txq_strings(u8 **string, int q_num)
+{
+	ethtool_sprintf(string, "txq%d_packets", q_num);
+	ethtool_sprintf(string, "txq%d_bytes", q_num);
+	ethtool_sprintf(string, "txq%d_producer_index", q_num);
+	ethtool_sprintf(string, "txq%d_consumer_index", q_num);
+	ethtool_sprintf(string, "txq%d_hw_consumer_index", q_num);
+}
+
 static void
 bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string)
 {
@@ -531,175 +593,57 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string)
 	int i, j, q_num;
 	u32 bmap;
 
+	if (stringset != ETH_SS_STATS)
+		return;
+
 	mutex_lock(&bnad->conf_mutex);
 
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) {
-			BUG_ON(!(strlen(bnad_net_stats_strings[i]) <
-				   ETH_GSTRING_LEN));
-			strncpy(string, bnad_net_stats_strings[i],
-				ETH_GSTRING_LEN);
-			string += ETH_GSTRING_LEN;
-		}
-		bmap = bna_tx_rid_mask(&bnad->bna);
-		for (i = 0; bmap; i++) {
-			if (bmap & 1) {
-				sprintf(string, "txf%d_ucast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_ucast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_ucast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_mcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_mcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_mcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_bcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_bcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_bcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_errors", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_filter_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_filter_mac_sa", i);
-				string += ETH_GSTRING_LEN;
-			}
-			bmap >>= 1;
-		}
+	for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) {
+		BUG_ON(!(strlen(bnad_net_stats_strings[i]) < ETH_GSTRING_LEN));
+		ethtool_sprintf(&string, bnad_net_stats_strings[i]);
+	}
 
-		bmap = bna_rx_rid_mask(&bnad->bna);
-		for (i = 0; bmap; i++) {
-			if (bmap & 1) {
-				sprintf(string, "rxf%d_ucast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_ucast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_ucast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_mcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_mcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_mcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_bcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_bcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_bcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_frame_drops", i);
-				string += ETH_GSTRING_LEN;
-			}
-			bmap >>= 1;
-		}
+	bmap = bna_tx_rid_mask(&bnad->bna);
+	for (i = 0; bmap; i++) {
+		if (bmap & 1)
+			bnad_get_txf_strings(&string, i);
+		bmap >>= 1;
+	}
 
-		q_num = 0;
-		for (i = 0; i < bnad->num_rx; i++) {
-			if (!bnad->rx_info[i].rx)
-				continue;
-			for (j = 0; j < bnad->num_rxp_per_rx; j++) {
-				sprintf(string, "cq%d_producer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_consumer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_hw_producer_index",
-					q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_intr", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_poll", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_schedule", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_keep_poll", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_complete", q_num);
-				string += ETH_GSTRING_LEN;
-				q_num++;
-			}
-		}
+	bmap = bna_rx_rid_mask(&bnad->bna);
+	for (i = 0; bmap; i++, bmap >>= 1) {
+		if (bmap & 1)
+			bnad_get_rxf_strings(&string, i);
+		bmap >>= 1;
+	}
 
-		q_num = 0;
-		for (i = 0; i < bnad->num_rx; i++) {
-			if (!bnad->rx_info[i].rx)
-				continue;
-			for (j = 0; j < bnad->num_rxp_per_rx; j++) {
-				sprintf(string, "rxq%d_packets", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_bytes", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_packets_with_error",
-								q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_allocbuf_failed", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_mapbuf_failed", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_producer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_consumer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				q_num++;
-				if (bnad->rx_info[i].rx_ctrl[j].ccb &&
-					bnad->rx_info[i].rx_ctrl[j].ccb->
-					rcb[1] &&
-					bnad->rx_info[i].rx_ctrl[j].ccb->
-					rcb[1]->rxq) {
-					sprintf(string, "rxq%d_packets", q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_bytes", q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string,
-					"rxq%d_packets_with_error", q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_allocbuf_failed",
-								q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_mapbuf_failed",
-						q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_producer_index",
-								q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_consumer_index",
-								q_num);
-					string += ETH_GSTRING_LEN;
-					q_num++;
-				}
-			}
-		}
+	q_num = 0;
+	for (i = 0; i < bnad->num_rx; i++) {
+		if (!bnad->rx_info[i].rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++)
+			bnad_get_cq_strings(&string, q_num++);
+	}
 
-		q_num = 0;
-		for (i = 0; i < bnad->num_tx; i++) {
-			if (!bnad->tx_info[i].tx)
-				continue;
-			for (j = 0; j < bnad->num_txq_per_tx; j++) {
-				sprintf(string, "txq%d_packets", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_bytes", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_producer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_consumer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_hw_consumer_index",
-									q_num);
-				string += ETH_GSTRING_LEN;
-				q_num++;
-			}
+	q_num = 0;
+	for (i = 0; i < bnad->num_rx; i++) {
+		if (!bnad->rx_info[i].rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+			bnad_get_rxq_strings(&string, q_num++);
+			if (bnad->rx_info[i].rx_ctrl[j].ccb &&
+			    bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] &&
+			    bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1]->rxq)
+				bnad_get_rxq_strings(&string, q_num++);
 		}
+	}
 
-		break;
-
-	default:
-		break;
+	q_num = 0;
+	for (i = 0; i < bnad->num_tx; i++) {
+		if (!bnad->tx_info[i].tx)
+			continue;
+		for (j = 0; j < bnad->num_txq_per_tx; j++)
+			bnad_get_txq_strings(&string, q_num++);
 	}
 
 	mutex_unlock(&bnad->conf_mutex);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index d8c68906525a..d8d87213697c 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -159,6 +159,16 @@
 #define GEM_PEFTN		0x01f4 /* PTP Peer Event Frame Tx Ns */
 #define GEM_PEFRSL		0x01f8 /* PTP Peer Event Frame Rx Sec Low */
 #define GEM_PEFRN		0x01fc /* PTP Peer Event Frame Rx Ns */
+#define GEM_PCSCNTRL		0x0200 /* PCS Control */
+#define GEM_PCSSTS		0x0204 /* PCS Status */
+#define GEM_PCSPHYTOPID		0x0208 /* PCS PHY Top ID */
+#define GEM_PCSPHYBOTID		0x020c /* PCS PHY Bottom ID */
+#define GEM_PCSANADV		0x0210 /* PCS AN Advertisement */
+#define GEM_PCSANLPBASE		0x0214 /* PCS AN Link Partner Base */
+#define GEM_PCSANEXP		0x0218 /* PCS AN Expansion */
+#define GEM_PCSANNPTX		0x021c /* PCS AN Next Page TX */
+#define GEM_PCSANNPLP		0x0220 /* PCS AN Next Page LP */
+#define GEM_PCSANEXTSTS		0x023c /* PCS AN Extended Status */
 #define GEM_DCFG1		0x0280 /* Design Config 1 */
 #define GEM_DCFG2		0x0284 /* Design Config 2 */
 #define GEM_DCFG3		0x0288 /* Design Config 3 */
@@ -478,6 +488,10 @@
 #define GEM_HS_MAC_SPEED_OFFSET			0
 #define GEM_HS_MAC_SPEED_SIZE			3
 
+/* Bitfields in PCSCNTRL */
+#define GEM_PCSAUTONEG_OFFSET			12
+#define GEM_PCSAUTONEG_SIZE			1
+
 /* Bitfields in DCFG1. */
 #define GEM_IRQCOR_OFFSET			23
 #define GEM_IRQCOR_SIZE				1
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 15362d016a87..f56f3dbbc015 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -694,6 +694,22 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 	if (old_ncr ^ ncr)
 		macb_or_gem_writel(bp, NCR, ncr);
 
+	/* Disable AN for SGMII fixed link configuration, enable otherwise.
+	 * Must be written after PCSSEL is set in NCFGR,
+	 * otherwise writes will not take effect.
+	 */
+	if (macb_is_gem(bp) && state->interface == PHY_INTERFACE_MODE_SGMII) {
+		u32 pcsctrl, old_pcsctrl;
+
+		old_pcsctrl = gem_readl(bp, PCSCNTRL);
+		if (mode == MLO_AN_FIXED)
+			pcsctrl = old_pcsctrl & ~GEM_BIT(PCSAUTONEG);
+		else
+			pcsctrl = old_pcsctrl | GEM_BIT(PCSAUTONEG);
+		if (old_pcsctrl != pcsctrl)
+			gem_writel(bp, PCSCNTRL, pcsctrl);
+	}
+
 	spin_unlock_irqrestore(&bp->lock, flags);
 }
 
@@ -847,6 +863,15 @@ static int macb_phylink_connect(struct macb *bp)
 	return 0;
 }
 
+static void macb_get_pcs_fixed_state(struct phylink_config *config,
+				     struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(ndev);
+
+	state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0;
+}
+
 /* based on au1000_eth. c*/
 static int macb_mii_probe(struct net_device *dev)
 {
@@ -855,6 +880,11 @@ static int macb_mii_probe(struct net_device *dev)
 	bp->phylink_config.dev = &dev->dev;
 	bp->phylink_config.type = PHYLINK_NETDEV;
 
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		bp->phylink_config.poll_fixed_state = true;
+		bp->phylink_config.get_fixed_state = macb_get_pcs_fixed_state;
+	}
+
 	bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode,
 				     bp->phy_interface, &macb_phylink_ops);
 	if (IS_ERR(bp->phylink)) {
@@ -3728,17 +3758,15 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 		*hclk = devm_clk_get(&pdev->dev, "hclk");
 	}
 
-	if (IS_ERR_OR_NULL(*pclk)) {
-		err = IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV;
-		dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err);
-		return err;
-	}
+	if (IS_ERR_OR_NULL(*pclk))
+		return dev_err_probe(&pdev->dev,
+				     IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV,
+				     "failed to get pclk\n");
 
-	if (IS_ERR_OR_NULL(*hclk)) {
-		err = IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV;
-		dev_err(&pdev->dev, "failed to get hclk (%d)\n", err);
-		return err;
-	}
+	if (IS_ERR_OR_NULL(*hclk))
+		return dev_err_probe(&pdev->dev,
+				     IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV,
+				     "failed to get hclk\n");
 
 	*tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk");
 	if (IS_ERR(*tx_clk))
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 6c85a10f465c..d2ba40c19696 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -3536,8 +3536,7 @@ out:
 	}
 
 out_free:
-	if (data)
-		kvfree(data);
+	kvfree(data);
 
 #undef QDESC_GET_FLQ
 #undef QDESC_GET_RXQ
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 83b46440408b..b1cae5a19839 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -979,7 +979,7 @@ void clear_filter(struct adapter *adap, struct filter_entry *f)
 {
 	struct port_info *pi = netdev_priv(f->dev);
 
-	/* If the new or old filter have loopback rewriteing rules then we'll
+	/* If the new or old filter have loopback rewriting rules then we'll
 	 * need to free any existing L2T, SMT, CLIP entries of filter
 	 * rule.
 	 */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
index 2e309f6673f7..28fd2de9e4cf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
@@ -48,6 +48,11 @@ static int cxgb4_matchall_egress_validate(struct net_device *dev,
 	flow_action_for_each(i, entry, actions) {
 		switch (entry->id) {
 		case FLOW_ACTION_POLICE:
+			if (entry->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
 			/* Convert bytes per second to bits per second */
 			if (entry->police.rate_bytes_ps * 8 > max_link_rate) {
 				NL_SET_ERR_MSG_MOD(extack,
@@ -145,7 +150,11 @@ static int cxgb4_matchall_alloc_tc(struct net_device *dev,
 	flow_action_for_each(i, entry, &cls->rule->action)
 		if (entry->id == FLOW_ACTION_POLICE)
 			break;
-
+	if (entry->police.rate_pkt_ps) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "QoS offload not support packets per second");
+		return -EOPNOTSUPP;
+	}
 	/* Convert from bytes per second to Kbps */
 	p.u.params.maxrate = div_u64(entry->police.rate_bytes_ps * 8, 1000);
 	p.u.params.channel = pi->tx_chan;
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 473b337b2e3b..5a1a8f2ea63c 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1177,18 +1177,7 @@ static struct platform_driver ftmac100_driver = {
 /******************************************************************************
  * initialization / finalization
  *****************************************************************************/
-static int __init ftmac100_init(void)
-{
-	return platform_driver_register(&ftmac100_driver);
-}
-
-static void __exit ftmac100_exit(void)
-{
-	platform_driver_unregister(&ftmac100_driver);
-}
-
-module_init(ftmac100_init);
-module_exit(ftmac100_exit);
+module_platform_driver(ftmac100_driver);
 
 MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>");
 MODULE_DESCRIPTION("FTMAC100 driver");
diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index ee7a906e30b3..d029b69c3f18 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -29,3 +29,11 @@ config FSL_DPAA2_PTP_CLOCK
 	help
 	  This driver adds support for using the DPAA2 1588 timer module
 	  as a PTP clock.
+
+config FSL_DPAA2_SWITCH
+	tristate "Freescale DPAA2 Ethernet Switch"
+	depends on BRIDGE || BRIDGE=n
+	depends on NET_SWITCHDEV
+	help
+	  Driver for Freescale DPAA2 Ethernet Switch. This driver manages
+	  switch objects discovered on the Freeescale MC bus.
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index 146cb3540e61..644ef9ae02a3 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -5,11 +5,13 @@
 
 obj-$(CONFIG_FSL_DPAA2_ETH)		+= fsl-dpaa2-eth.o
 obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK)	+= fsl-dpaa2-ptp.o
+obj-$(CONFIG_FSL_DPAA2_SWITCH)		+= fsl-dpaa2-switch.o
 
 fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpaa2-eth-devlink.o
 fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs	:= dpaa2-ptp.o dprtc.o
+fsl-dpaa2-switch-objs	:= dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o
 
 # Needed by the tracing framework
 CFLAGS_dpaa2-eth.o := -I$(src)
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
index 0af2e9914ec4..70e04321c420 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
@@ -9,7 +9,7 @@
 
 #include <linux/ethtool.h>
 
-#include "ethsw.h"
+#include "dpaa2-switch.h"
 
 static struct {
 	enum dpsw_counter id;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
new file mode 100644
index 000000000000..a9b30a72ddad
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -0,0 +1,3031 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DPAA2 Ethernet Switch driver
+ *
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#include <linux/module.h>
+
+#include <linux/interrupt.h>
+#include <linux/msi.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <linux/iommu.h>
+
+#include <linux/fsl/mc.h>
+
+#include "dpaa2-switch.h"
+
+/* Minimal supported DPSW version */
+#define DPSW_MIN_VER_MAJOR		8
+#define DPSW_MIN_VER_MINOR		9
+
+#define DEFAULT_VLAN_ID			1
+
+static u16 dpaa2_switch_port_get_fdb_id(struct ethsw_port_priv *port_priv)
+{
+	return port_priv->fdb->fdb_id;
+}
+
+static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+		if (!ethsw->fdbs[i].in_use)
+			return &ethsw->fdbs[i];
+	return NULL;
+}
+
+static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
+				     struct net_device *bridge_dev)
+{
+	struct ethsw_port_priv *other_port_priv = NULL;
+	struct dpaa2_switch_fdb *fdb;
+	struct net_device *other_dev;
+	struct list_head *iter;
+
+	/* If we leave a bridge (bridge_dev is NULL), find an unused
+	 * FDB and use that.
+	 */
+	if (!bridge_dev) {
+		fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
+
+		/* If there is no unused FDB, we must be the last port that
+		 * leaves the last bridge, all the others are standalone. We
+		 * can just keep the FDB that we already have.
+		 */
+
+		if (!fdb) {
+			port_priv->fdb->bridge_dev = NULL;
+			return 0;
+		}
+
+		port_priv->fdb = fdb;
+		port_priv->fdb->in_use = true;
+		port_priv->fdb->bridge_dev = NULL;
+		return 0;
+	}
+
+	/* The below call to netdev_for_each_lower_dev() demands the RTNL lock
+	 * being held. Assert on it so that it's easier to catch new code
+	 * paths that reach this point without the RTNL lock.
+	 */
+	ASSERT_RTNL();
+
+	/* If part of a bridge, use the FDB of the first dpaa2 switch interface
+	 * to be present in that bridge
+	 */
+	netdev_for_each_lower_dev(bridge_dev, other_dev, iter) {
+		if (!dpaa2_switch_port_dev_check(other_dev))
+			continue;
+
+		if (other_dev == port_priv->netdev)
+			continue;
+
+		other_port_priv = netdev_priv(other_dev);
+		break;
+	}
+
+	/* The current port is about to change its FDB to the one used by the
+	 * first port that joined the bridge.
+	 */
+	if (other_port_priv) {
+		/* The previous FDB is about to become unused, since the
+		 * interface is no longer standalone.
+		 */
+		port_priv->fdb->in_use = false;
+		port_priv->fdb->bridge_dev = NULL;
+
+		/* Get a reference to the new FDB */
+		port_priv->fdb = other_port_priv->fdb;
+	}
+
+	/* Keep track of the new upper bridge device */
+	port_priv->fdb->bridge_dev = bridge_dev;
+
+	return 0;
+}
+
+static void dpaa2_switch_fdb_get_flood_cfg(struct ethsw_core *ethsw, u16 fdb_id,
+					   enum dpsw_flood_type type,
+					   struct dpsw_egress_flood_cfg *cfg)
+{
+	int i = 0, j;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	/* Add all the DPAA2 switch ports found in the same bridging domain to
+	 * the egress flooding domain
+	 */
+	for (j = 0; j < ethsw->sw_attr.num_ifs; j++) {
+		if (!ethsw->ports[j])
+			continue;
+		if (ethsw->ports[j]->fdb->fdb_id != fdb_id)
+			continue;
+
+		if (type == DPSW_BROADCAST && ethsw->ports[j]->bcast_flood)
+			cfg->if_id[i++] = ethsw->ports[j]->idx;
+		else if (type == DPSW_FLOODING && ethsw->ports[j]->ucast_flood)
+			cfg->if_id[i++] = ethsw->ports[j]->idx;
+	}
+
+	/* Add the CTRL interface to the egress flooding domain */
+	cfg->if_id[i++] = ethsw->sw_attr.num_ifs;
+
+	cfg->fdb_id = fdb_id;
+	cfg->flood_type = type;
+	cfg->num_ifs = i;
+}
+
+static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
+{
+	struct dpsw_egress_flood_cfg flood_cfg;
+	int err;
+
+	/* Setup broadcast flooding domain */
+	dpaa2_switch_fdb_get_flood_cfg(ethsw, fdb_id, DPSW_BROADCAST, &flood_cfg);
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	/* Setup unknown flooding domain */
+	dpaa2_switch_fdb_get_flood_cfg(ethsw, fdb_id, DPSW_FLOODING, &flood_cfg);
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
+				dma_addr_t iova_addr)
+{
+	phys_addr_t phys_addr;
+
+	phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;
+
+	return phys_to_virt(phys_addr);
+}
+
+static int dpaa2_switch_add_vlan(struct ethsw_port_priv *port_priv, u16 vid)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_vlan_cfg vcfg = {0};
+	int err;
+
+	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_vlan_add(ethsw->mc_io, 0,
+			    ethsw->dpsw_handle, vid, &vcfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_vlan_add err %d\n", err);
+		return err;
+	}
+	ethsw->vlans[vid] = ETHSW_VLAN_MEMBER;
+
+	return 0;
+}
+
+static bool dpaa2_switch_port_is_up(struct ethsw_port_priv *port_priv)
+{
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_link_state state;
+	int err;
+
+	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     port_priv->idx, &state);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
+		return true;
+	}
+
+	WARN_ONCE(state.up > 1, "Garbage read into link_state");
+
+	return state.up ? true : false;
+}
+
+static int dpaa2_switch_port_set_pvid(struct ethsw_port_priv *port_priv, u16 pvid)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_tci_cfg tci_cfg = { 0 };
+	bool up;
+	int err, ret;
+
+	err = dpsw_if_get_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			      port_priv->idx, &tci_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_tci err %d\n", err);
+		return err;
+	}
+
+	tci_cfg.vlan_id = pvid;
+
+	/* Interface needs to be down to change PVID */
+	up = dpaa2_switch_port_is_up(port_priv);
+	if (up) {
+		err = dpsw_if_disable(ethsw->mc_io, 0,
+				      ethsw->dpsw_handle,
+				      port_priv->idx);
+		if (err) {
+			netdev_err(netdev, "dpsw_if_disable err %d\n", err);
+			return err;
+		}
+	}
+
+	err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			      port_priv->idx, &tci_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_set_tci err %d\n", err);
+		goto set_tci_error;
+	}
+
+	/* Delete previous PVID info and mark the new one */
+	port_priv->vlans[port_priv->pvid] &= ~ETHSW_VLAN_PVID;
+	port_priv->vlans[pvid] |= ETHSW_VLAN_PVID;
+	port_priv->pvid = pvid;
+
+set_tci_error:
+	if (up) {
+		ret = dpsw_if_enable(ethsw->mc_io, 0,
+				     ethsw->dpsw_handle,
+				     port_priv->idx);
+		if (ret) {
+			netdev_err(netdev, "dpsw_if_enable err %d\n", ret);
+			return ret;
+		}
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
+				      u16 vid, u16 flags)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_vlan_if_cfg vcfg = {0};
+	int err;
+
+	if (port_priv->vlans[vid]) {
+		netdev_warn(netdev, "VLAN %d already configured\n", vid);
+		return -EEXIST;
+	}
+
+	/* If hit, this VLAN rule will lead the packet into the FDB table
+	 * specified in the vlan configuration below
+	 */
+	vcfg.num_ifs = 1;
+	vcfg.if_id[0] = port_priv->idx;
+	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	vcfg.options |= DPSW_VLAN_ADD_IF_OPT_FDB_ID;
+	err = dpsw_vlan_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, vid, &vcfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_vlan_add_if err %d\n", err);
+		return err;
+	}
+
+	port_priv->vlans[vid] = ETHSW_VLAN_MEMBER;
+
+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
+		err = dpsw_vlan_add_if_untagged(ethsw->mc_io, 0,
+						ethsw->dpsw_handle,
+						vid, &vcfg);
+		if (err) {
+			netdev_err(netdev,
+				   "dpsw_vlan_add_if_untagged err %d\n", err);
+			return err;
+		}
+		port_priv->vlans[vid] |= ETHSW_VLAN_UNTAGGED;
+	}
+
+	if (flags & BRIDGE_VLAN_INFO_PVID) {
+		err = dpaa2_switch_port_set_pvid(port_priv, vid);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state)
+{
+	struct dpsw_stp_cfg stp_cfg = {
+		.state = state,
+	};
+	int err;
+	u16 vid;
+
+	if (!netif_running(port_priv->netdev) || state == port_priv->stp_state)
+		return 0;	/* Nothing to do */
+
+	for (vid = 0; vid <= VLAN_VID_MASK; vid++) {
+		if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
+			stp_cfg.vlan_id = vid;
+			err = dpsw_if_set_stp(port_priv->ethsw_data->mc_io, 0,
+					      port_priv->ethsw_data->dpsw_handle,
+					      port_priv->idx, &stp_cfg);
+			if (err) {
+				netdev_err(port_priv->netdev,
+					   "dpsw_if_set_stp err %d\n", err);
+				return err;
+			}
+		}
+	}
+
+	port_priv->stp_state = state;
+
+	return 0;
+}
+
+static int dpaa2_switch_dellink(struct ethsw_core *ethsw, u16 vid)
+{
+	struct ethsw_port_priv *ppriv_local = NULL;
+	int i, err;
+
+	if (!ethsw->vlans[vid])
+		return -ENOENT;
+
+	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, vid);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_vlan_remove err %d\n", err);
+		return err;
+	}
+	ethsw->vlans[vid] = 0;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		ppriv_local = ethsw->ports[i];
+		ppriv_local->vlans[vid] = 0;
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_port_fdb_add_uc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_unicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	entry.if_egress = port_priv->idx;
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	ether_addr_copy(entry.mac_addr, addr);
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_add_unicast(port_priv->ethsw_data->mc_io, 0,
+				   port_priv->ethsw_data->dpsw_handle,
+				   fdb_id, &entry);
+	if (err)
+		netdev_err(port_priv->netdev,
+			   "dpsw_fdb_add_unicast err %d\n", err);
+	return err;
+}
+
+static int dpaa2_switch_port_fdb_del_uc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_unicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	entry.if_egress = port_priv->idx;
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	ether_addr_copy(entry.mac_addr, addr);
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_remove_unicast(port_priv->ethsw_data->mc_io, 0,
+				      port_priv->ethsw_data->dpsw_handle,
+				      fdb_id, &entry);
+	/* Silently discard error for calling multiple times the del command */
+	if (err && err != -ENXIO)
+		netdev_err(port_priv->netdev,
+			   "dpsw_fdb_remove_unicast err %d\n", err);
+	return err;
+}
+
+static int dpaa2_switch_port_fdb_add_mc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_multicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	ether_addr_copy(entry.mac_addr, addr);
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	entry.num_ifs = 1;
+	entry.if_id[0] = port_priv->idx;
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_add_multicast(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     fdb_id, &entry);
+	/* Silently discard error for calling multiple times the add command */
+	if (err && err != -ENXIO)
+		netdev_err(port_priv->netdev, "dpsw_fdb_add_multicast err %d\n",
+			   err);
+	return err;
+}
+
+static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_multicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	ether_addr_copy(entry.mac_addr, addr);
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	entry.num_ifs = 1;
+	entry.if_id[0] = port_priv->idx;
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_remove_multicast(port_priv->ethsw_data->mc_io, 0,
+					port_priv->ethsw_data->dpsw_handle,
+					fdb_id, &entry);
+	/* Silently discard error for calling multiple times the del command */
+	if (err && err != -ENAVAIL)
+		netdev_err(port_priv->netdev,
+			   "dpsw_fdb_remove_multicast err %d\n", err);
+	return err;
+}
+
+static void dpaa2_switch_port_get_stats(struct net_device *netdev,
+					struct rtnl_link_stats64 *stats)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	u64 tmp;
+	int err;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_FRAME, &stats->rx_packets);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_EGR_FRAME, &stats->tx_packets);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_BYTE, &stats->rx_bytes);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_EGR_BYTE, &stats->tx_bytes);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_FRAME_DISCARD,
+				  &stats->rx_dropped);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_FLTR_FRAME,
+				  &tmp);
+	if (err)
+		goto error;
+	stats->rx_dropped += tmp;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_EGR_FRAME_DISCARD,
+				  &stats->tx_dropped);
+	if (err)
+		goto error;
+
+	return;
+
+error:
+	netdev_err(netdev, "dpsw_if_get_counter err %d\n", err);
+}
+
+static bool dpaa2_switch_port_has_offload_stats(const struct net_device *netdev,
+						int attr_id)
+{
+	return (attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT);
+}
+
+static int dpaa2_switch_port_get_offload_stats(int attr_id,
+					       const struct net_device *netdev,
+					       void *sp)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		dpaa2_switch_port_get_stats((struct net_device *)netdev, sp);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	err = dpsw_if_set_max_frame_length(port_priv->ethsw_data->mc_io,
+					   0,
+					   port_priv->ethsw_data->dpsw_handle,
+					   port_priv->idx,
+					   (u16)ETHSW_L2_MAX_FRM(mtu));
+	if (err) {
+		netdev_err(netdev,
+			   "dpsw_if_set_max_frame_length() err %d\n", err);
+		return err;
+	}
+
+	netdev->mtu = mtu;
+	return 0;
+}
+
+static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpsw_link_state state;
+	int err;
+
+	/* Interrupts are received even though no one issued an 'ifconfig up'
+	 * on the switch interface. Ignore these link state update interrupts
+	 */
+	if (!netif_running(netdev))
+		return 0;
+
+	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     port_priv->idx, &state);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
+		return err;
+	}
+
+	WARN_ONCE(state.up > 1, "Garbage read into link_state");
+
+	if (state.up != port_priv->link_state) {
+		if (state.up) {
+			netif_carrier_on(netdev);
+			netif_tx_start_all_queues(netdev);
+		} else {
+			netif_carrier_off(netdev);
+			netif_tx_stop_all_queues(netdev);
+		}
+		port_priv->link_state = state.up;
+	}
+
+	return 0;
+}
+
+/* Manage all NAPI instances for the control interface.
+ *
+ * We only have one RX queue and one Tx Conf queue for all
+ * switch ports. Therefore, we only need to enable the NAPI instance once, the
+ * first time one of the switch ports runs .dev_open().
+ */
+
+static void dpaa2_switch_enable_ctrl_if_napi(struct ethsw_core *ethsw)
+{
+	int i;
+
+	/* Access to the ethsw->napi_users relies on the RTNL lock */
+	ASSERT_RTNL();
+
+	/* a new interface is using the NAPI instance */
+	ethsw->napi_users++;
+
+	/* if there is already a user of the instance, return */
+	if (ethsw->napi_users > 1)
+		return;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		napi_enable(&ethsw->fq[i].napi);
+}
+
+static void dpaa2_switch_disable_ctrl_if_napi(struct ethsw_core *ethsw)
+{
+	int i;
+
+	/* Access to the ethsw->napi_users relies on the RTNL lock */
+	ASSERT_RTNL();
+
+	/* If we are not the last interface using the NAPI, return */
+	ethsw->napi_users--;
+	if (ethsw->napi_users)
+		return;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		napi_disable(&ethsw->fq[i].napi);
+}
+
+static int dpaa2_switch_port_open(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int err;
+
+	/* Explicitly set carrier off, otherwise
+	 * netif_carrier_ok() will return true and cause 'ip link show'
+	 * to report the LOWER_UP flag, even though the link
+	 * notification wasn't even received.
+	 */
+	netif_carrier_off(netdev);
+
+	err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
+			     port_priv->ethsw_data->dpsw_handle,
+			     port_priv->idx);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_enable err %d\n", err);
+		return err;
+	}
+
+	/* sync carrier state */
+	err = dpaa2_switch_port_carrier_state_sync(netdev);
+	if (err) {
+		netdev_err(netdev,
+			   "dpaa2_switch_port_carrier_state_sync err %d\n", err);
+		goto err_carrier_sync;
+	}
+
+	dpaa2_switch_enable_ctrl_if_napi(ethsw);
+
+	return 0;
+
+err_carrier_sync:
+	dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
+			port_priv->ethsw_data->dpsw_handle,
+			port_priv->idx);
+	return err;
+}
+
+static int dpaa2_switch_port_stop(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int err;
+
+	err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
+			      port_priv->ethsw_data->dpsw_handle,
+			      port_priv->idx);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_disable err %d\n", err);
+		return err;
+	}
+
+	dpaa2_switch_disable_ctrl_if_napi(ethsw);
+
+	return 0;
+}
+
+static int dpaa2_switch_port_parent_id(struct net_device *dev,
+				       struct netdev_phys_item_id *ppid)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(dev);
+
+	ppid->id_len = 1;
+	ppid->id[0] = port_priv->ethsw_data->dev_id;
+
+	return 0;
+}
+
+static int dpaa2_switch_port_get_phys_name(struct net_device *netdev, char *name,
+					   size_t len)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	err = snprintf(name, len, "p%d", port_priv->idx);
+	if (err >= len)
+		return -EINVAL;
+
+	return 0;
+}
+
+struct ethsw_dump_ctx {
+	struct net_device *dev;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+	int idx;
+};
+
+static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry,
+				    struct ethsw_dump_ctx *dump)
+{
+	int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC;
+	u32 portid = NETLINK_CB(dump->cb->skb).portid;
+	u32 seq = dump->cb->nlh->nlmsg_seq;
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	if (dump->idx < dump->cb->args[2])
+		goto skip;
+
+	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+			sizeof(*ndm), NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family  = AF_BRIDGE;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags   = NTF_SELF;
+	ndm->ndm_type    = 0;
+	ndm->ndm_ifindex = dump->dev->ifindex;
+	ndm->ndm_state   = is_dynamic ? NUD_REACHABLE : NUD_NOARP;
+
+	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, entry->mac_addr))
+		goto nla_put_failure;
+
+	nlmsg_end(dump->skb, nlh);
+
+skip:
+	dump->idx++;
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(dump->skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int dpaa2_switch_port_fdb_valid_entry(struct fdb_dump_entry *entry,
+					     struct ethsw_port_priv *port_priv)
+{
+	int idx = port_priv->idx;
+	int valid;
+
+	if (entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
+		valid = entry->if_info == port_priv->idx;
+	else
+		valid = entry->if_mask[idx / 8] & BIT(idx % 8);
+
+	return valid;
+}
+
+static int dpaa2_switch_fdb_iterate(struct ethsw_port_priv *port_priv,
+				    dpaa2_switch_fdb_cb_t cb, void *data)
+{
+	struct net_device *net_dev = port_priv->netdev;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct device *dev = net_dev->dev.parent;
+	struct fdb_dump_entry *fdb_entries;
+	struct fdb_dump_entry fdb_entry;
+	dma_addr_t fdb_dump_iova;
+	u16 num_fdb_entries;
+	u32 fdb_dump_size;
+	int err = 0, i;
+	u8 *dma_mem;
+	u16 fdb_id;
+
+	fdb_dump_size = ethsw->sw_attr.max_fdb_entries * sizeof(fdb_entry);
+	dma_mem = kzalloc(fdb_dump_size, GFP_KERNEL);
+	if (!dma_mem)
+		return -ENOMEM;
+
+	fdb_dump_iova = dma_map_single(dev, dma_mem, fdb_dump_size,
+				       DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, fdb_dump_iova)) {
+		netdev_err(net_dev, "dma_map_single() failed\n");
+		err = -ENOMEM;
+		goto err_map;
+	}
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_dump(ethsw->mc_io, 0, ethsw->dpsw_handle, fdb_id,
+			    fdb_dump_iova, fdb_dump_size, &num_fdb_entries);
+	if (err) {
+		netdev_err(net_dev, "dpsw_fdb_dump() = %d\n", err);
+		goto err_dump;
+	}
+
+	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_FROM_DEVICE);
+
+	fdb_entries = (struct fdb_dump_entry *)dma_mem;
+	for (i = 0; i < num_fdb_entries; i++) {
+		fdb_entry = fdb_entries[i];
+
+		err = cb(port_priv, &fdb_entry, data);
+		if (err)
+			goto end;
+	}
+
+end:
+	kfree(dma_mem);
+
+	return 0;
+
+err_dump:
+	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_TO_DEVICE);
+err_map:
+	kfree(dma_mem);
+	return err;
+}
+
+static int dpaa2_switch_fdb_entry_dump(struct ethsw_port_priv *port_priv,
+				       struct fdb_dump_entry *fdb_entry,
+				       void *data)
+{
+	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
+		return 0;
+
+	return dpaa2_switch_fdb_dump_nl(fdb_entry, data);
+}
+
+static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+				      struct net_device *net_dev,
+				      struct net_device *filter_dev, int *idx)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
+	struct ethsw_dump_ctx dump = {
+		.dev = net_dev,
+		.skb = skb,
+		.cb = cb,
+		.idx = *idx,
+	};
+	int err;
+
+	err = dpaa2_switch_fdb_iterate(port_priv, dpaa2_switch_fdb_entry_dump, &dump);
+	*idx = dump.idx;
+
+	return err;
+}
+
+static int dpaa2_switch_fdb_entry_fast_age(struct ethsw_port_priv *port_priv,
+					   struct fdb_dump_entry *fdb_entry,
+					   void *data __always_unused)
+{
+	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
+		return 0;
+
+	if (!(fdb_entry->type & DPSW_FDB_ENTRY_TYPE_DYNAMIC))
+		return 0;
+
+	if (fdb_entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
+		dpaa2_switch_port_fdb_del_uc(port_priv, fdb_entry->mac_addr);
+	else
+		dpaa2_switch_port_fdb_del_mc(port_priv, fdb_entry->mac_addr);
+
+	return 0;
+}
+
+static void dpaa2_switch_port_fast_age(struct ethsw_port_priv *port_priv)
+{
+	dpaa2_switch_fdb_iterate(port_priv,
+				 dpaa2_switch_fdb_entry_fast_age, NULL);
+}
+
+static int dpaa2_switch_port_vlan_add(struct net_device *netdev, __be16 proto,
+				      u16 vid)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = vid,
+		.obj.orig_dev = netdev,
+		/* This API only allows programming tagged, non-PVID VIDs */
+		.flags = 0,
+	};
+
+	return dpaa2_switch_port_vlans_add(netdev, &vlan);
+}
+
+static int dpaa2_switch_port_vlan_kill(struct net_device *netdev, __be16 proto,
+				       u16 vid)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = vid,
+		.obj.orig_dev = netdev,
+		/* This API only allows programming tagged, non-PVID VIDs */
+		.flags = 0,
+	};
+
+	return dpaa2_switch_port_vlans_del(netdev, &vlan);
+}
+
+static int dpaa2_switch_port_set_mac_addr(struct ethsw_port_priv *port_priv)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *net_dev = port_priv->netdev;
+	struct device *dev = net_dev->dev.parent;
+	u8 mac_addr[ETH_ALEN];
+	int err;
+
+	if (!(ethsw->features & ETHSW_FEATURE_MAC_ADDR))
+		return 0;
+
+	/* Get firmware address, if any */
+	err = dpsw_if_get_port_mac_addr(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					port_priv->idx, mac_addr);
+	if (err) {
+		dev_err(dev, "dpsw_if_get_port_mac_addr() failed\n");
+		return err;
+	}
+
+	/* First check if firmware has any address configured by bootloader */
+	if (!is_zero_ether_addr(mac_addr)) {
+		memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
+	} else {
+		/* No MAC address configured, fill in net_dev->dev_addr
+		 * with a random one
+		 */
+		eth_hw_addr_random(net_dev);
+		dev_dbg_once(dev, "device(s) have all-zero hwaddr, replaced with random\n");
+
+		/* Override NET_ADDR_RANDOM set by eth_hw_addr_random(); for all
+		 * practical purposes, this will be our "permanent" mac address,
+		 * at least until the next reboot. This move will also permit
+		 * register_netdevice() to properly fill up net_dev->perm_addr.
+		 */
+		net_dev->addr_assign_type = NET_ADDR_PERM;
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_free_fd(const struct ethsw_core *ethsw,
+				 const struct dpaa2_fd *fd)
+{
+	struct device *dev = ethsw->dev;
+	unsigned char *buffer_start;
+	struct sk_buff **skbh, *skb;
+	dma_addr_t fd_addr;
+
+	fd_addr = dpaa2_fd_get_addr(fd);
+	skbh = dpaa2_iova_to_virt(ethsw->iommu_domain, fd_addr);
+
+	skb = *skbh;
+	buffer_start = (unsigned char *)skbh;
+
+	dma_unmap_single(dev, fd_addr,
+			 skb_tail_pointer(skb) - buffer_start,
+			 DMA_TO_DEVICE);
+
+	/* Move on with skb release */
+	dev_kfree_skb(skb);
+}
+
+static int dpaa2_switch_build_single_fd(struct ethsw_core *ethsw,
+					struct sk_buff *skb,
+					struct dpaa2_fd *fd)
+{
+	struct device *dev = ethsw->dev;
+	struct sk_buff **skbh;
+	dma_addr_t addr;
+	u8 *buff_start;
+	void *hwa;
+
+	buff_start = PTR_ALIGN(skb->data - DPAA2_SWITCH_TX_DATA_OFFSET -
+			       DPAA2_SWITCH_TX_BUF_ALIGN,
+			       DPAA2_SWITCH_TX_BUF_ALIGN);
+
+	/* Clear FAS to have consistent values for TX confirmation. It is
+	 * located in the first 8 bytes of the buffer's hardware annotation
+	 * area
+	 */
+	hwa = buff_start + DPAA2_SWITCH_SWA_SIZE;
+	memset(hwa, 0, 8);
+
+	/* Store a backpointer to the skb at the beginning of the buffer
+	 * (in the private data area) such that we can release it
+	 * on Tx confirm
+	 */
+	skbh = (struct sk_buff **)buff_start;
+	*skbh = skb;
+
+	addr = dma_map_single(dev, buff_start,
+			      skb_tail_pointer(skb) - buff_start,
+			      DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, addr)))
+		return -ENOMEM;
+
+	/* Setup the FD fields */
+	memset(fd, 0, sizeof(*fd));
+
+	dpaa2_fd_set_addr(fd, addr);
+	dpaa2_fd_set_offset(fd, (u16)(skb->data - buff_start));
+	dpaa2_fd_set_len(fd, skb->len);
+	dpaa2_fd_set_format(fd, dpaa2_fd_single);
+
+	return 0;
+}
+
+static netdev_tx_t dpaa2_switch_port_tx(struct sk_buff *skb,
+					struct net_device *net_dev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int retries = DPAA2_SWITCH_SWP_BUSY_RETRIES;
+	struct dpaa2_fd fd;
+	int err;
+
+	if (unlikely(skb_headroom(skb) < DPAA2_SWITCH_NEEDED_HEADROOM)) {
+		struct sk_buff *ns;
+
+		ns = skb_realloc_headroom(skb, DPAA2_SWITCH_NEEDED_HEADROOM);
+		if (unlikely(!ns)) {
+			net_err_ratelimited("%s: Error reallocating skb headroom\n", net_dev->name);
+			goto err_free_skb;
+		}
+		dev_consume_skb_any(skb);
+		skb = ns;
+	}
+
+	/* We'll be holding a back-reference to the skb until Tx confirmation */
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (unlikely(!skb)) {
+		/* skb_unshare() has already freed the skb */
+		net_err_ratelimited("%s: Error copying the socket buffer\n", net_dev->name);
+		goto err_exit;
+	}
+
+	/* At this stage, we do not support non-linear skbs so just try to
+	 * linearize the skb and if that's not working, just drop the packet.
+	 */
+	err = skb_linearize(skb);
+	if (err) {
+		net_err_ratelimited("%s: skb_linearize error (%d)!\n", net_dev->name, err);
+		goto err_free_skb;
+	}
+
+	err = dpaa2_switch_build_single_fd(ethsw, skb, &fd);
+	if (unlikely(err)) {
+		net_err_ratelimited("%s: ethsw_build_*_fd() %d\n", net_dev->name, err);
+		goto err_free_skb;
+	}
+
+	do {
+		err = dpaa2_io_service_enqueue_qd(NULL,
+						  port_priv->tx_qdid,
+						  8, 0, &fd);
+		retries--;
+	} while (err == -EBUSY && retries);
+
+	if (unlikely(err < 0)) {
+		dpaa2_switch_free_fd(ethsw, &fd);
+		goto err_exit;
+	}
+
+	return NETDEV_TX_OK;
+
+err_free_skb:
+	dev_kfree_skb(skb);
+err_exit:
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops dpaa2_switch_port_ops = {
+	.ndo_open		= dpaa2_switch_port_open,
+	.ndo_stop		= dpaa2_switch_port_stop,
+
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_get_stats64	= dpaa2_switch_port_get_stats,
+	.ndo_change_mtu		= dpaa2_switch_port_change_mtu,
+	.ndo_has_offload_stats	= dpaa2_switch_port_has_offload_stats,
+	.ndo_get_offload_stats	= dpaa2_switch_port_get_offload_stats,
+	.ndo_fdb_dump		= dpaa2_switch_port_fdb_dump,
+	.ndo_vlan_rx_add_vid	= dpaa2_switch_port_vlan_add,
+	.ndo_vlan_rx_kill_vid	= dpaa2_switch_port_vlan_kill,
+
+	.ndo_start_xmit		= dpaa2_switch_port_tx,
+	.ndo_get_port_parent_id	= dpaa2_switch_port_parent_id,
+	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
+};
+
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
+{
+	return netdev->netdev_ops == &dpaa2_switch_port_ops;
+}
+
+static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
+		dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
+	}
+}
+
+static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
+{
+	struct device *dev = (struct device *)arg;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+
+	/* Mask the events and the if_id reserved bits to be cleared on read */
+	u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
+	int err;
+
+	err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, &status);
+	if (err) {
+		dev_err(dev, "Can't get irq status (err %d)\n", err);
+
+		err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					    DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
+		if (err)
+			dev_err(dev, "Can't clear irq status (err %d)\n", err);
+		goto out;
+	}
+
+	if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
+		dpaa2_switch_links_state_update(ethsw);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int dpaa2_switch_setup_irqs(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	u32 mask = DPSW_IRQ_EVENT_LINK_CHANGED;
+	struct fsl_mc_device_irq *irq;
+	int err;
+
+	err = fsl_mc_allocate_irqs(sw_dev);
+	if (err) {
+		dev_err(dev, "MC irqs allocation failed\n");
+		return err;
+	}
+
+	if (WARN_ON(sw_dev->obj_desc.irq_count != DPSW_IRQ_NUM)) {
+		err = -EINVAL;
+		goto free_irq;
+	}
+
+	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, 0);
+	if (err) {
+		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
+		goto free_irq;
+	}
+
+	irq = sw_dev->irqs[DPSW_IRQ_INDEX_IF];
+
+	err = devm_request_threaded_irq(dev, irq->msi_desc->irq,
+					NULL,
+					dpaa2_switch_irq0_handler_thread,
+					IRQF_NO_SUSPEND | IRQF_ONESHOT,
+					dev_name(dev), dev);
+	if (err) {
+		dev_err(dev, "devm_request_threaded_irq(): %d\n", err);
+		goto free_irq;
+	}
+
+	err = dpsw_set_irq_mask(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				DPSW_IRQ_INDEX_IF, mask);
+	if (err) {
+		dev_err(dev, "dpsw_set_irq_mask(): %d\n", err);
+		goto free_devm_irq;
+	}
+
+	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, 1);
+	if (err) {
+		dev_err(dev, "dpsw_set_irq_enable(): %d\n", err);
+		goto free_devm_irq;
+	}
+
+	return 0;
+
+free_devm_irq:
+	devm_free_irq(dev, irq->msi_desc->irq, dev);
+free_irq:
+	fsl_mc_free_irqs(sw_dev);
+	return err;
+}
+
+static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	int err;
+
+	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, 0);
+	if (err)
+		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
+
+	fsl_mc_free_irqs(sw_dev);
+}
+
+static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
+						u8 state)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+
+	return dpaa2_switch_port_set_stp_state(port_priv, state);
+}
+
+static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, bool enable)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	enum dpsw_learning_mode learn_mode;
+	int err;
+
+	if (enable)
+		learn_mode = DPSW_LEARNING_MODE_HW;
+	else
+		learn_mode = DPSW_LEARNING_MODE_DIS;
+
+	err = dpsw_if_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					port_priv->idx, learn_mode);
+	if (err)
+		netdev_err(port_priv->netdev, "dpsw_if_set_learning_mode err %d\n", err);
+
+	if (!enable)
+		dpaa2_switch_port_fast_age(port_priv);
+
+	return err;
+}
+
+static int dpaa2_switch_port_flood(struct ethsw_port_priv *port_priv,
+				   struct switchdev_brport_flags flags)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+
+	if (flags.mask & BR_BCAST_FLOOD)
+		port_priv->bcast_flood = !!(flags.val & BR_BCAST_FLOOD);
+
+	if (flags.mask & BR_FLOOD)
+		port_priv->ucast_flood = !!(flags.val & BR_FLOOD);
+
+	return dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+}
+
+static int dpaa2_switch_port_pre_bridge_flags(struct net_device *netdev,
+					      struct switchdev_brport_flags flags,
+					      struct netlink_ext_ack *extack)
+{
+	if (flags.mask & ~(BR_LEARNING | BR_BCAST_FLOOD | BR_FLOOD |
+			   BR_MCAST_FLOOD))
+		return -EINVAL;
+
+	if (flags.mask & (BR_FLOOD | BR_MCAST_FLOOD)) {
+		bool multicast = !!(flags.val & BR_MCAST_FLOOD);
+		bool unicast = !!(flags.val & BR_FLOOD);
+
+		if (unicast != multicast) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Cannot configure multicast flooding independently of unicast");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_port_bridge_flags(struct net_device *netdev,
+					  struct switchdev_brport_flags flags,
+					  struct netlink_ext_ack *extack)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	if (flags.mask & BR_LEARNING) {
+		bool learn_ena = !!(flags.val & BR_LEARNING);
+
+		err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
+		if (err)
+			return err;
+	}
+
+	if (flags.mask & (BR_BCAST_FLOOD | BR_FLOOD | BR_MCAST_FLOOD)) {
+		err = dpaa2_switch_port_flood(port_priv, flags);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_port_attr_set(struct net_device *netdev,
+				      const struct switchdev_attr *attr,
+				      struct netlink_ext_ack *extack)
+{
+	int err = 0;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+		err = dpaa2_switch_port_attr_stp_state_set(netdev,
+							   attr->u.stp_state);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		if (!attr->u.vlan_filtering) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "The DPAA2 switch does not support VLAN-unaware operation");
+			return -EOPNOTSUPP;
+		}
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		err = dpaa2_switch_port_pre_bridge_flags(netdev, attr->u.brport_flags, extack);
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		err = dpaa2_switch_port_bridge_flags(netdev, attr->u.brport_flags, extack);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+int dpaa2_switch_port_vlans_add(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_attr *attr = &ethsw->sw_attr;
+	int err = 0;
+
+	/* Make sure that the VLAN is not already configured
+	 * on the switch port
+	 */
+	if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER)
+		return -EEXIST;
+
+	/* Check if there is space for a new VLAN */
+	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  &ethsw->sw_attr);
+	if (err) {
+		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
+		return err;
+	}
+	if (attr->max_vlans - attr->num_vlans < 1)
+		return -ENOSPC;
+
+	/* Check if there is space for a new VLAN */
+	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  &ethsw->sw_attr);
+	if (err) {
+		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
+		return err;
+	}
+	if (attr->max_vlans - attr->num_vlans < 1)
+		return -ENOSPC;
+
+	if (!port_priv->ethsw_data->vlans[vlan->vid]) {
+		/* this is a new VLAN */
+		err = dpaa2_switch_add_vlan(port_priv, vlan->vid);
+		if (err)
+			return err;
+
+		port_priv->ethsw_data->vlans[vlan->vid] |= ETHSW_VLAN_GLOBAL;
+	}
+
+	return dpaa2_switch_port_add_vlan(port_priv, vlan->vid, vlan->flags);
+}
+
+static int dpaa2_switch_port_lookup_address(struct net_device *netdev, int is_uc,
+					    const unsigned char *addr)
+{
+	struct netdev_hw_addr_list *list = (is_uc) ? &netdev->uc : &netdev->mc;
+	struct netdev_hw_addr *ha;
+
+	netif_addr_lock_bh(netdev);
+	list_for_each_entry(ha, &list->list, list) {
+		if (ether_addr_equal(ha->addr, addr)) {
+			netif_addr_unlock_bh(netdev);
+			return 1;
+		}
+	}
+	netif_addr_unlock_bh(netdev);
+	return 0;
+}
+
+static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
+				     const struct switchdev_obj_port_mdb *mdb)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	/* Check if address is already set on this port */
+	if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
+		return -EEXIST;
+
+	err = dpaa2_switch_port_fdb_add_mc(port_priv, mdb->addr);
+	if (err)
+		return err;
+
+	err = dev_mc_add(netdev, mdb->addr);
+	if (err) {
+		netdev_err(netdev, "dev_mc_add err %d\n", err);
+		dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_obj_add(struct net_device *netdev,
+				     const struct switchdev_obj *obj)
+{
+	int err;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = dpaa2_switch_port_vlans_add(netdev,
+						  SWITCHDEV_OBJ_PORT_VLAN(obj));
+		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		err = dpaa2_switch_port_mdb_add(netdev,
+						SWITCHDEV_OBJ_PORT_MDB(obj));
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_vlan_if_cfg vcfg;
+	int i, err;
+
+	if (!port_priv->vlans[vid])
+		return -ENOENT;
+
+	if (port_priv->vlans[vid] & ETHSW_VLAN_PVID) {
+		/* If we are deleting the PVID of a port, use VLAN 4095 instead
+		 * as we are sure that neither the bridge nor the 8021q module
+		 * will use it
+		 */
+		err = dpaa2_switch_port_set_pvid(port_priv, 4095);
+		if (err)
+			return err;
+	}
+
+	vcfg.num_ifs = 1;
+	vcfg.if_id[0] = port_priv->idx;
+	if (port_priv->vlans[vid] & ETHSW_VLAN_UNTAGGED) {
+		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0,
+						   ethsw->dpsw_handle,
+						   vid, &vcfg);
+		if (err) {
+			netdev_err(netdev,
+				   "dpsw_vlan_remove_if_untagged err %d\n",
+				   err);
+		}
+		port_priv->vlans[vid] &= ~ETHSW_VLAN_UNTAGGED;
+	}
+
+	if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
+		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  vid, &vcfg);
+		if (err) {
+			netdev_err(netdev,
+				   "dpsw_vlan_remove_if err %d\n", err);
+			return err;
+		}
+		port_priv->vlans[vid] &= ~ETHSW_VLAN_MEMBER;
+
+		/* Delete VLAN from switch if it is no longer configured on
+		 * any port
+		 */
+		for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+			if (ethsw->ports[i]->vlans[vid] & ETHSW_VLAN_MEMBER)
+				return 0; /* Found a port member in VID */
+
+		ethsw->vlans[vid] &= ~ETHSW_VLAN_GLOBAL;
+
+		err = dpaa2_switch_dellink(ethsw, vid);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int dpaa2_switch_port_vlans_del(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+
+	if (netif_is_bridge_master(vlan->obj.orig_dev))
+		return -EOPNOTSUPP;
+
+	return dpaa2_switch_port_del_vlan(port_priv, vlan->vid);
+}
+
+static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
+				     const struct switchdev_obj_port_mdb *mdb)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
+		return -ENOENT;
+
+	err = dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
+	if (err)
+		return err;
+
+	err = dev_mc_del(netdev, mdb->addr);
+	if (err) {
+		netdev_err(netdev, "dev_mc_del err %d\n", err);
+		return err;
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_obj_del(struct net_device *netdev,
+				     const struct switchdev_obj *obj)
+{
+	int err;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = dpaa2_switch_port_vlans_del(netdev, SWITCHDEV_OBJ_PORT_VLAN(obj));
+		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		err = dpaa2_switch_port_mdb_del(netdev, SWITCHDEV_OBJ_PORT_MDB(obj));
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
+					    struct switchdev_notifier_port_attr_info *ptr)
+{
+	int err;
+
+	err = switchdev_handle_port_attr_set(netdev, ptr,
+					     dpaa2_switch_port_dev_check,
+					     dpaa2_switch_port_attr_set);
+	return notifier_from_errno(err);
+}
+
+static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
+					 struct net_device *upper_dev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct ethsw_port_priv *other_port_priv;
+	struct net_device *other_dev;
+	struct list_head *iter;
+	bool learn_ena;
+	int err;
+
+	netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
+		if (!dpaa2_switch_port_dev_check(other_dev))
+			continue;
+
+		other_port_priv = netdev_priv(other_dev);
+		if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
+			netdev_err(netdev,
+				   "Interface from a different DPSW is in the bridge already!\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Delete the previously manually installed VLAN 1 */
+	err = dpaa2_switch_port_del_vlan(port_priv, 1);
+	if (err)
+		return err;
+
+	dpaa2_switch_port_set_fdb(port_priv, upper_dev);
+
+	/* Inherit the initial bridge port learning state */
+	learn_ena = br_port_flag_is_set(netdev, BR_LEARNING);
+	err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
+
+	/* Setup the egress flood policy (broadcast, unknown unicast) */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		goto err_egress_flood;
+
+	return 0;
+
+err_egress_flood:
+	dpaa2_switch_port_set_fdb(port_priv, NULL);
+	return err;
+}
+
+static int dpaa2_switch_port_clear_rxvlan(struct net_device *vdev, int vid, void *arg)
+{
+	__be16 vlan_proto = htons(ETH_P_8021Q);
+
+	if (vdev)
+		vlan_proto = vlan_dev_vlan_proto(vdev);
+
+	return dpaa2_switch_port_vlan_kill(arg, vlan_proto, vid);
+}
+
+static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, void *arg)
+{
+	__be16 vlan_proto = htons(ETH_P_8021Q);
+
+	if (vdev)
+		vlan_proto = vlan_dev_vlan_proto(vdev);
+
+	return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid);
+}
+
+static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int err;
+
+	/* First of all, fast age any learn FDB addresses on this switch port */
+	dpaa2_switch_port_fast_age(port_priv);
+
+	/* Clear all RX VLANs installed through vlan_vid_add() either as VLAN
+	 * upper devices or otherwise from the FDB table that we are about to
+	 * leave
+	 */
+	err = vlan_for_each(netdev, dpaa2_switch_port_clear_rxvlan, netdev);
+	if (err)
+		netdev_err(netdev, "Unable to clear RX VLANs from old FDB table, err (%d)\n", err);
+
+	dpaa2_switch_port_set_fdb(port_priv, NULL);
+
+	/* Restore all RX VLANs into the new FDB table that we just joined */
+	err = vlan_for_each(netdev, dpaa2_switch_port_restore_rxvlan, netdev);
+	if (err)
+		netdev_err(netdev, "Unable to restore RX VLANs to the new FDB, err (%d)\n", err);
+
+	/* Reset the flooding state to denote that this port can send any
+	 * packet in standalone mode. With this, we are also ensuring that any
+	 * later bridge join will have the flooding flag on.
+	 */
+	port_priv->bcast_flood = true;
+	port_priv->ucast_flood = true;
+
+	/* Setup the egress flood policy (broadcast, unknown unicast).
+	 * When the port is not under a bridge, only the CTRL interface is part
+	 * of the flooding domain besides the actual port
+	 */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		return err;
+
+	/* Recreate the egress flood domain of the FDB that we just left */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id);
+	if (err)
+		return err;
+
+	/* No HW learning when not under a bridge */
+	err = dpaa2_switch_port_set_learning(port_priv, false);
+	if (err)
+		return err;
+
+	/* Add the VLAN 1 as PVID when not under a bridge. We need this since
+	 * the dpaa2 switch interfaces are not capable to be VLAN unaware
+	 */
+	return dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID,
+					  BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID);
+}
+
+static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *netdev)
+{
+	struct net_device *upper_dev;
+	struct list_head *iter;
+
+	/* RCU read lock not necessary because we have write-side protection
+	 * (rtnl_mutex), however a non-rcu iterator does not exist.
+	 */
+	netdev_for_each_upper_dev_rcu(netdev, upper_dev, iter)
+		if (is_vlan_dev(upper_dev))
+			return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
+					     unsigned long event, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper_dev;
+	int err = 0;
+
+	if (!dpaa2_switch_port_dev_check(netdev))
+		return NOTIFY_DONE;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (!netif_is_bridge_master(upper_dev))
+			break;
+
+		if (!br_vlan_enabled(upper_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Cannot join a bridge while VLAN uppers are present");
+			goto out;
+		}
+
+		break;
+	case NETDEV_CHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (netif_is_bridge_master(upper_dev)) {
+			if (info->linking)
+				err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
+			else
+				err = dpaa2_switch_port_bridge_leave(netdev);
+		}
+		break;
+	}
+
+out:
+	return notifier_from_errno(err);
+}
+
+struct ethsw_switchdev_event_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+static void dpaa2_switch_event_work(struct work_struct *work)
+{
+	struct ethsw_switchdev_event_work *switchdev_work =
+		container_of(work, struct ethsw_switchdev_event_work, work);
+	struct net_device *dev = switchdev_work->dev;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	int err;
+
+	rtnl_lock();
+	fdb_info = &switchdev_work->fdb_info;
+
+	switch (switchdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		if (!fdb_info->added_by_user)
+			break;
+		if (is_unicast_ether_addr(fdb_info->addr))
+			err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
+							   fdb_info->addr);
+		else
+			err = dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
+							   fdb_info->addr);
+		if (err)
+			break;
+		fdb_info->offloaded = true;
+		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+					 &fdb_info->info, NULL);
+		break;
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		if (!fdb_info->added_by_user)
+			break;
+		if (is_unicast_ether_addr(fdb_info->addr))
+			dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
+		else
+			dpaa2_switch_port_fdb_del_mc(netdev_priv(dev), fdb_info->addr);
+		break;
+	}
+
+	rtnl_unlock();
+	kfree(switchdev_work->fdb_info.addr);
+	kfree(switchdev_work);
+	dev_put(dev);
+}
+
+/* Called under rcu_read_lock() */
+static int dpaa2_switch_port_event(struct notifier_block *nb,
+				   unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct ethsw_port_priv *port_priv = netdev_priv(dev);
+	struct ethsw_switchdev_event_work *switchdev_work;
+	struct switchdev_notifier_fdb_info *fdb_info = ptr;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+
+	if (event == SWITCHDEV_PORT_ATTR_SET)
+		return dpaa2_switch_port_attr_set_event(dev, ptr);
+
+	if (!dpaa2_switch_port_dev_check(dev))
+		return NOTIFY_DONE;
+
+	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+	if (!switchdev_work)
+		return NOTIFY_BAD;
+
+	INIT_WORK(&switchdev_work->work, dpaa2_switch_event_work);
+	switchdev_work->dev = dev;
+	switchdev_work->event = event;
+
+	switch (event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		memcpy(&switchdev_work->fdb_info, ptr,
+		       sizeof(switchdev_work->fdb_info));
+		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+		if (!switchdev_work->fdb_info.addr)
+			goto err_addr_alloc;
+
+		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+				fdb_info->addr);
+
+		/* Take a reference on the device to avoid being freed. */
+		dev_hold(dev);
+		break;
+	default:
+		kfree(switchdev_work);
+		return NOTIFY_DONE;
+	}
+
+	queue_work(ethsw->workqueue, &switchdev_work->work);
+
+	return NOTIFY_DONE;
+
+err_addr_alloc:
+	kfree(switchdev_work);
+	return NOTIFY_BAD;
+}
+
+static int dpaa2_switch_port_obj_event(unsigned long event,
+				       struct net_device *netdev,
+				       struct switchdev_notifier_port_obj_info *port_obj_info)
+{
+	int err = -EOPNOTSUPP;
+
+	if (!dpaa2_switch_port_dev_check(netdev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj);
+		break;
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj);
+		break;
+	}
+
+	port_obj_info->handled = true;
+	return notifier_from_errno(err);
+}
+
+static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
+					    unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+	case SWITCHDEV_PORT_OBJ_DEL:
+		return dpaa2_switch_port_obj_event(event, dev, ptr);
+	case SWITCHDEV_PORT_ATTR_SET:
+		return dpaa2_switch_port_attr_set_event(dev, ptr);
+	}
+
+	return NOTIFY_DONE;
+}
+
+/* Build a linear skb based on a single-buffer frame descriptor */
+static struct sk_buff *dpaa2_switch_build_linear_skb(struct ethsw_core *ethsw,
+						     const struct dpaa2_fd *fd)
+{
+	u16 fd_offset = dpaa2_fd_get_offset(fd);
+	dma_addr_t addr = dpaa2_fd_get_addr(fd);
+	u32 fd_length = dpaa2_fd_get_len(fd);
+	struct device *dev = ethsw->dev;
+	struct sk_buff *skb = NULL;
+	void *fd_vaddr;
+
+	fd_vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, addr);
+	dma_unmap_page(dev, addr, DPAA2_SWITCH_RX_BUF_SIZE,
+		       DMA_FROM_DEVICE);
+
+	skb = build_skb(fd_vaddr, DPAA2_SWITCH_RX_BUF_SIZE +
+			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (unlikely(!skb)) {
+		dev_err(dev, "build_skb() failed\n");
+		return NULL;
+	}
+
+	skb_reserve(skb, fd_offset);
+	skb_put(skb, fd_length);
+
+	ethsw->buf_count--;
+
+	return skb;
+}
+
+static void dpaa2_switch_tx_conf(struct dpaa2_switch_fq *fq,
+				 const struct dpaa2_fd *fd)
+{
+	dpaa2_switch_free_fd(fq->ethsw, fd);
+}
+
+static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
+			    const struct dpaa2_fd *fd)
+{
+	struct ethsw_core *ethsw = fq->ethsw;
+	struct ethsw_port_priv *port_priv;
+	struct net_device *netdev;
+	struct vlan_ethhdr *hdr;
+	struct sk_buff *skb;
+	u16 vlan_tci, vid;
+	int if_id, err;
+
+	/* get switch ingress interface ID */
+	if_id = upper_32_bits(dpaa2_fd_get_flc(fd)) & 0x0000FFFF;
+
+	if (if_id >= ethsw->sw_attr.num_ifs) {
+		dev_err(ethsw->dev, "Frame received from unknown interface!\n");
+		goto err_free_fd;
+	}
+	port_priv = ethsw->ports[if_id];
+	netdev = port_priv->netdev;
+
+	/* build the SKB based on the FD received */
+	if (dpaa2_fd_get_format(fd) != dpaa2_fd_single) {
+		if (net_ratelimit()) {
+			netdev_err(netdev, "Received invalid frame format\n");
+			goto err_free_fd;
+		}
+	}
+
+	skb = dpaa2_switch_build_linear_skb(ethsw, fd);
+	if (unlikely(!skb))
+		goto err_free_fd;
+
+	skb_reset_mac_header(skb);
+
+	/* Remove the VLAN header if the packet that we just received has a vid
+	 * equal to the port PVIDs. Since the dpaa2-switch can operate only in
+	 * VLAN-aware mode and no alterations are made on the packet when it's
+	 * redirected/mirrored to the control interface, we are sure that there
+	 * will always be a VLAN header present.
+	 */
+	hdr = vlan_eth_hdr(skb);
+	vid = ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK;
+	if (vid == port_priv->pvid) {
+		err = __skb_vlan_pop(skb, &vlan_tci);
+		if (err) {
+			dev_info(ethsw->dev, "__skb_vlan_pop() returned %d", err);
+			goto err_free_fd;
+		}
+	}
+
+	skb->dev = netdev;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	/* Setup the offload_fwd_mark only if the port is under a bridge */
+	skb->offload_fwd_mark = !!(port_priv->fdb->bridge_dev);
+
+	netif_receive_skb(skb);
+
+	return;
+
+err_free_fd:
+	dpaa2_switch_free_fd(ethsw, fd);
+}
+
+static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
+{
+	ethsw->features = 0;
+
+	if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6))
+		ethsw->features |= ETHSW_FEATURE_MAC_ADDR;
+}
+
+static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_attr ctrl_if_attr;
+	struct device *dev = ethsw->dev;
+	int i = 0;
+	int err;
+
+	err = dpsw_ctrl_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  &ctrl_if_attr);
+	if (err) {
+		dev_err(dev, "dpsw_ctrl_if_get_attributes() = %d\n", err);
+		return err;
+	}
+
+	ethsw->fq[i].fqid = ctrl_if_attr.rx_fqid;
+	ethsw->fq[i].ethsw = ethsw;
+	ethsw->fq[i++].type = DPSW_QUEUE_RX;
+
+	ethsw->fq[i].fqid = ctrl_if_attr.tx_err_conf_fqid;
+	ethsw->fq[i].ethsw = ethsw;
+	ethsw->fq[i++].type = DPSW_QUEUE_TX_ERR_CONF;
+
+	return 0;
+}
+
+/* Free buffers acquired from the buffer pool or which were meant to
+ * be released in the pool
+ */
+static void dpaa2_switch_free_bufs(struct ethsw_core *ethsw, u64 *buf_array, int count)
+{
+	struct device *dev = ethsw->dev;
+	void *vaddr;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, buf_array[i]);
+		dma_unmap_page(dev, buf_array[i], DPAA2_SWITCH_RX_BUF_SIZE,
+			       DMA_FROM_DEVICE);
+		free_pages((unsigned long)vaddr, 0);
+	}
+}
+
+/* Perform a single release command to add buffers
+ * to the specified buffer pool
+ */
+static int dpaa2_switch_add_bufs(struct ethsw_core *ethsw, u16 bpid)
+{
+	struct device *dev = ethsw->dev;
+	u64 buf_array[BUFS_PER_CMD];
+	struct page *page;
+	int retries = 0;
+	dma_addr_t addr;
+	int err;
+	int i;
+
+	for (i = 0; i < BUFS_PER_CMD; i++) {
+		/* Allocate one page for each Rx buffer. WRIOP sees
+		 * the entire page except for a tailroom reserved for
+		 * skb shared info
+		 */
+		page = dev_alloc_pages(0);
+		if (!page) {
+			dev_err(dev, "buffer allocation failed\n");
+			goto err_alloc;
+		}
+
+		addr = dma_map_page(dev, page, 0, DPAA2_SWITCH_RX_BUF_SIZE,
+				    DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, addr)) {
+			dev_err(dev, "dma_map_single() failed\n");
+			goto err_map;
+		}
+		buf_array[i] = addr;
+	}
+
+release_bufs:
+	/* In case the portal is busy, retry until successful or
+	 * max retries hit.
+	 */
+	while ((err = dpaa2_io_service_release(NULL, bpid,
+					       buf_array, i)) == -EBUSY) {
+		if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES)
+			break;
+
+		cpu_relax();
+	}
+
+	/* If release command failed, clean up and bail out. */
+	if (err) {
+		dpaa2_switch_free_bufs(ethsw, buf_array, i);
+		return 0;
+	}
+
+	return i;
+
+err_map:
+	__free_pages(page, 0);
+err_alloc:
+	/* If we managed to allocate at least some buffers,
+	 * release them to hardware
+	 */
+	if (i)
+		goto release_bufs;
+
+	return 0;
+}
+
+static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw)
+{
+	int *count = &ethsw->buf_count;
+	int new_count;
+	int err = 0;
+
+	if (unlikely(*count < DPAA2_ETHSW_REFILL_THRESH)) {
+		do {
+			new_count = dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
+			if (unlikely(!new_count)) {
+				/* Out of memory; abort for now, we'll
+				 * try later on
+				 */
+				break;
+			}
+			*count += new_count;
+		} while (*count < DPAA2_ETHSW_NUM_BUFS);
+
+		if (unlikely(*count < DPAA2_ETHSW_NUM_BUFS))
+			err = -ENOMEM;
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw)
+{
+	int *count, i;
+
+	for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) {
+		count = &ethsw->buf_count;
+		*count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
+
+		if (unlikely(*count < BUFS_PER_CMD))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_drain_bp(struct ethsw_core *ethsw)
+{
+	u64 buf_array[BUFS_PER_CMD];
+	int ret;
+
+	do {
+		ret = dpaa2_io_service_acquire(NULL, ethsw->bpid,
+					       buf_array, BUFS_PER_CMD);
+		if (ret < 0) {
+			dev_err(ethsw->dev,
+				"dpaa2_io_service_acquire() = %d\n", ret);
+			return;
+		}
+		dpaa2_switch_free_bufs(ethsw, buf_array, ret);
+
+	} while (ret);
+}
+
+static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_pools_cfg dpsw_ctrl_if_pools_cfg = { 0 };
+	struct device *dev = ethsw->dev;
+	struct fsl_mc_device *dpbp_dev;
+	struct dpbp_attr dpbp_attrs;
+	int err;
+
+	err = fsl_mc_object_allocate(to_fsl_mc_device(dev), FSL_MC_POOL_DPBP,
+				     &dpbp_dev);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "DPBP device allocation failed\n");
+		return err;
+	}
+	ethsw->dpbp_dev = dpbp_dev;
+
+	err = dpbp_open(ethsw->mc_io, 0, dpbp_dev->obj_desc.id,
+			&dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_open() failed\n");
+		goto err_open;
+	}
+
+	err = dpbp_reset(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_reset() failed\n");
+		goto err_reset;
+	}
+
+	err = dpbp_enable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_enable() failed\n");
+		goto err_enable;
+	}
+
+	err = dpbp_get_attributes(ethsw->mc_io, 0, dpbp_dev->mc_handle,
+				  &dpbp_attrs);
+	if (err) {
+		dev_err(dev, "dpbp_get_attributes() failed\n");
+		goto err_get_attr;
+	}
+
+	dpsw_ctrl_if_pools_cfg.num_dpbp = 1;
+	dpsw_ctrl_if_pools_cfg.pools[0].dpbp_id = dpbp_attrs.id;
+	dpsw_ctrl_if_pools_cfg.pools[0].buffer_size = DPAA2_SWITCH_RX_BUF_SIZE;
+	dpsw_ctrl_if_pools_cfg.pools[0].backup_pool = 0;
+
+	err = dpsw_ctrl_if_set_pools(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				     &dpsw_ctrl_if_pools_cfg);
+	if (err) {
+		dev_err(dev, "dpsw_ctrl_if_set_pools() failed\n");
+		goto err_get_attr;
+	}
+	ethsw->bpid = dpbp_attrs.id;
+
+	return 0;
+
+err_get_attr:
+	dpbp_disable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+err_enable:
+err_reset:
+	dpbp_close(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+err_open:
+	fsl_mc_object_free(dpbp_dev);
+	return err;
+}
+
+static void dpaa2_switch_free_dpbp(struct ethsw_core *ethsw)
+{
+	dpbp_disable(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
+	dpbp_close(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
+	fsl_mc_object_free(ethsw->dpbp_dev);
+}
+
+static int dpaa2_switch_alloc_rings(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
+		ethsw->fq[i].store =
+			dpaa2_io_store_create(DPAA2_SWITCH_STORE_SIZE,
+					      ethsw->dev);
+		if (!ethsw->fq[i].store) {
+			dev_err(ethsw->dev, "dpaa2_io_store_create failed\n");
+			while (--i >= 0)
+				dpaa2_io_store_destroy(ethsw->fq[i].store);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_destroy_rings(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		dpaa2_io_store_destroy(ethsw->fq[i].store);
+}
+
+static int dpaa2_switch_pull_fq(struct dpaa2_switch_fq *fq)
+{
+	int err, retries = 0;
+
+	/* Try to pull from the FQ while the portal is busy and we didn't hit
+	 * the maximum number fo retries
+	 */
+	do {
+		err = dpaa2_io_service_pull_fq(NULL, fq->fqid, fq->store);
+		cpu_relax();
+	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
+
+	if (unlikely(err))
+		dev_err(fq->ethsw->dev, "dpaa2_io_service_pull err %d", err);
+
+	return err;
+}
+
+/* Consume all frames pull-dequeued into the store */
+static int dpaa2_switch_store_consume(struct dpaa2_switch_fq *fq)
+{
+	struct ethsw_core *ethsw = fq->ethsw;
+	int cleaned = 0, is_last;
+	struct dpaa2_dq *dq;
+	int retries = 0;
+
+	do {
+		/* Get the next available FD from the store */
+		dq = dpaa2_io_store_next(fq->store, &is_last);
+		if (unlikely(!dq)) {
+			if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES) {
+				dev_err_once(ethsw->dev,
+					     "No valid dequeue response\n");
+				return -ETIMEDOUT;
+			}
+			continue;
+		}
+
+		if (fq->type == DPSW_QUEUE_RX)
+			dpaa2_switch_rx(fq, dpaa2_dq_fd(dq));
+		else
+			dpaa2_switch_tx_conf(fq, dpaa2_dq_fd(dq));
+		cleaned++;
+
+	} while (!is_last);
+
+	return cleaned;
+}
+
+/* NAPI poll routine */
+static int dpaa2_switch_poll(struct napi_struct *napi, int budget)
+{
+	int err, cleaned = 0, store_cleaned, work_done;
+	struct dpaa2_switch_fq *fq;
+	int retries = 0;
+
+	fq = container_of(napi, struct dpaa2_switch_fq, napi);
+
+	do {
+		err = dpaa2_switch_pull_fq(fq);
+		if (unlikely(err))
+			break;
+
+		/* Refill pool if appropriate */
+		dpaa2_switch_refill_bp(fq->ethsw);
+
+		store_cleaned = dpaa2_switch_store_consume(fq);
+		cleaned += store_cleaned;
+
+		if (cleaned >= budget) {
+			work_done = budget;
+			goto out;
+		}
+
+	} while (store_cleaned);
+
+	/* We didn't consume the entire budget, so finish napi and re-enable
+	 * data availability notifications
+	 */
+	napi_complete_done(napi, cleaned);
+	do {
+		err = dpaa2_io_service_rearm(NULL, &fq->nctx);
+		cpu_relax();
+	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
+
+	work_done = max(cleaned, 1);
+out:
+
+	return work_done;
+}
+
+static void dpaa2_switch_fqdan_cb(struct dpaa2_io_notification_ctx *nctx)
+{
+	struct dpaa2_switch_fq *fq;
+
+	fq = container_of(nctx, struct dpaa2_switch_fq, nctx);
+
+	napi_schedule(&fq->napi);
+}
+
+static int dpaa2_switch_setup_dpio(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_queue_cfg queue_cfg;
+	struct dpaa2_io_notification_ctx *nctx;
+	int err, i, j;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
+		nctx = &ethsw->fq[i].nctx;
+
+		/* Register a new software context for the FQID.
+		 * By using NULL as the first parameter, we specify that we do
+		 * not care on which cpu are interrupts received for this queue
+		 */
+		nctx->is_cdan = 0;
+		nctx->id = ethsw->fq[i].fqid;
+		nctx->desired_cpu = DPAA2_IO_ANY_CPU;
+		nctx->cb = dpaa2_switch_fqdan_cb;
+		err = dpaa2_io_service_register(NULL, nctx, ethsw->dev);
+		if (err) {
+			err = -EPROBE_DEFER;
+			goto err_register;
+		}
+
+		queue_cfg.options = DPSW_CTRL_IF_QUEUE_OPT_DEST |
+				    DPSW_CTRL_IF_QUEUE_OPT_USER_CTX;
+		queue_cfg.dest_cfg.dest_type = DPSW_CTRL_IF_DEST_DPIO;
+		queue_cfg.dest_cfg.dest_id = nctx->dpio_id;
+		queue_cfg.dest_cfg.priority = 0;
+		queue_cfg.user_ctx = nctx->qman64;
+
+		err = dpsw_ctrl_if_set_queue(ethsw->mc_io, 0,
+					     ethsw->dpsw_handle,
+					     ethsw->fq[i].type,
+					     &queue_cfg);
+		if (err)
+			goto err_set_queue;
+	}
+
+	return 0;
+
+err_set_queue:
+	dpaa2_io_service_deregister(NULL, nctx, ethsw->dev);
+err_register:
+	for (j = 0; j < i; j++)
+		dpaa2_io_service_deregister(NULL, &ethsw->fq[j].nctx,
+					    ethsw->dev);
+
+	return err;
+}
+
+static void dpaa2_switch_free_dpio(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		dpaa2_io_service_deregister(NULL, &ethsw->fq[i].nctx,
+					    ethsw->dev);
+}
+
+static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
+{
+	int err;
+
+	/* setup FQs for Rx and Tx Conf */
+	err = dpaa2_switch_setup_fqs(ethsw);
+	if (err)
+		return err;
+
+	/* setup the buffer pool needed on the Rx path */
+	err = dpaa2_switch_setup_dpbp(ethsw);
+	if (err)
+		return err;
+
+	err = dpaa2_switch_seed_bp(ethsw);
+	if (err)
+		goto err_free_dpbp;
+
+	err = dpaa2_switch_alloc_rings(ethsw);
+	if (err)
+		goto err_drain_dpbp;
+
+	err = dpaa2_switch_setup_dpio(ethsw);
+	if (err)
+		goto err_destroy_rings;
+
+	err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err);
+		goto err_deregister_dpio;
+	}
+
+	return 0;
+
+err_deregister_dpio:
+	dpaa2_switch_free_dpio(ethsw);
+err_destroy_rings:
+	dpaa2_switch_destroy_rings(ethsw);
+err_drain_dpbp:
+	dpaa2_switch_drain_bp(ethsw);
+err_free_dpbp:
+	dpaa2_switch_free_dpbp(ethsw);
+
+	return err;
+}
+
+static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	struct dpsw_vlan_if_cfg vcfg = {0};
+	struct dpsw_tci_cfg tci_cfg = {0};
+	struct dpsw_stp_cfg stp_cfg;
+	int err;
+	u16 i;
+
+	ethsw->dev_id = sw_dev->obj_desc.id;
+
+	err = dpsw_open(ethsw->mc_io, 0, ethsw->dev_id, &ethsw->dpsw_handle);
+	if (err) {
+		dev_err(dev, "dpsw_open err %d\n", err);
+		return err;
+	}
+
+	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  &ethsw->sw_attr);
+	if (err) {
+		dev_err(dev, "dpsw_get_attributes err %d\n", err);
+		goto err_close;
+	}
+
+	err = dpsw_get_api_version(ethsw->mc_io, 0,
+				   &ethsw->major,
+				   &ethsw->minor);
+	if (err) {
+		dev_err(dev, "dpsw_get_api_version err %d\n", err);
+		goto err_close;
+	}
+
+	/* Minimum supported DPSW version check */
+	if (ethsw->major < DPSW_MIN_VER_MAJOR ||
+	    (ethsw->major == DPSW_MIN_VER_MAJOR &&
+	     ethsw->minor < DPSW_MIN_VER_MINOR)) {
+		dev_err(dev, "DPSW version %d:%d not supported. Use firmware 10.28.0 or greater.\n",
+			ethsw->major, ethsw->minor);
+		err = -EOPNOTSUPP;
+		goto err_close;
+	}
+
+	if (!dpaa2_switch_supports_cpu_traffic(ethsw)) {
+		err = -EOPNOTSUPP;
+		goto err_close;
+	}
+
+	dpaa2_switch_detect_features(ethsw);
+
+	err = dpsw_reset(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err) {
+		dev_err(dev, "dpsw_reset err %d\n", err);
+		goto err_close;
+	}
+
+	stp_cfg.vlan_id = DEFAULT_VLAN_ID;
+	stp_cfg.state = DPSW_STP_STATE_FORWARDING;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = dpsw_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle, i);
+		if (err) {
+			dev_err(dev, "dpsw_if_disable err %d\n", err);
+			goto err_close;
+		}
+
+		err = dpsw_if_set_stp(ethsw->mc_io, 0, ethsw->dpsw_handle, i,
+				      &stp_cfg);
+		if (err) {
+			dev_err(dev, "dpsw_if_set_stp err %d for port %d\n",
+				err, i);
+			goto err_close;
+		}
+
+		/* Switch starts with all ports configured to VLAN 1. Need to
+		 * remove this setting to allow configuration at bridge join
+		 */
+		vcfg.num_ifs = 1;
+		vcfg.if_id[0] = i;
+		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, ethsw->dpsw_handle,
+						   DEFAULT_VLAN_ID, &vcfg);
+		if (err) {
+			dev_err(dev, "dpsw_vlan_remove_if_untagged err %d\n",
+				err);
+			goto err_close;
+		}
+
+		tci_cfg.vlan_id = 4095;
+		err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle, i, &tci_cfg);
+		if (err) {
+			dev_err(dev, "dpsw_if_set_tci err %d\n", err);
+			goto err_close;
+		}
+
+		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  DEFAULT_VLAN_ID, &vcfg);
+		if (err) {
+			dev_err(dev, "dpsw_vlan_remove_if err %d\n", err);
+			goto err_close;
+		}
+	}
+
+	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, DEFAULT_VLAN_ID);
+	if (err) {
+		dev_err(dev, "dpsw_vlan_remove err %d\n", err);
+		goto err_close;
+	}
+
+	ethsw->workqueue = alloc_ordered_workqueue("%s_%d_ordered",
+						   WQ_MEM_RECLAIM, "ethsw",
+						   ethsw->sw_attr.id);
+	if (!ethsw->workqueue) {
+		err = -ENOMEM;
+		goto err_close;
+	}
+
+	err = dpsw_fdb_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, 0);
+	if (err)
+		goto err_destroy_ordered_workqueue;
+
+	err = dpaa2_switch_ctrl_if_setup(ethsw);
+	if (err)
+		goto err_destroy_ordered_workqueue;
+
+	return 0;
+
+err_destroy_ordered_workqueue:
+	destroy_workqueue(ethsw->workqueue);
+
+err_close:
+	dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	return err;
+}
+
+static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = DEFAULT_VLAN_ID,
+		.flags = BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID,
+	};
+	struct net_device *netdev = port_priv->netdev;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_fdb_cfg fdb_cfg = {0};
+	struct dpaa2_switch_fdb *fdb;
+	struct dpsw_if_attr dpsw_if_attr;
+	u16 fdb_id;
+	int err;
+
+	/* Get the Tx queue for this specific port */
+	err = dpsw_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				     port_priv->idx, &dpsw_if_attr);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_attributes err %d\n", err);
+		return err;
+	}
+	port_priv->tx_qdid = dpsw_if_attr.qdid;
+
+	/* Create a FDB table for this particular switch port */
+	fdb_cfg.num_fdb_entries = ethsw->sw_attr.max_fdb_entries / ethsw->sw_attr.num_ifs;
+	err = dpsw_fdb_add(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			   &fdb_id, &fdb_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_fdb_add err %d\n", err);
+		return err;
+	}
+
+	/* Find an unused dpaa2_switch_fdb structure and use it */
+	fdb = dpaa2_switch_fdb_get_unused(ethsw);
+	fdb->fdb_id = fdb_id;
+	fdb->in_use = true;
+	fdb->bridge_dev = NULL;
+	port_priv->fdb = fdb;
+
+	/* We need to add VLAN 1 as the PVID on this port until it is under a
+	 * bridge since the DPAA2 switch is not able to handle the traffic in a
+	 * VLAN unaware fashion
+	 */
+	err = dpaa2_switch_port_vlans_add(netdev, &vlan);
+	if (err)
+		return err;
+
+	/* Setup the egress flooding domains (broadcast, unknown unicast */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		return err;
+
+	return err;
+}
+
+static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	int err;
+
+	err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err)
+		dev_warn(dev, "dpsw_close err %d\n", err);
+}
+
+static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
+{
+	dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	dpaa2_switch_free_dpio(ethsw);
+	dpaa2_switch_destroy_rings(ethsw);
+	dpaa2_switch_drain_bp(ethsw);
+	dpaa2_switch_free_dpbp(ethsw);
+}
+
+static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
+{
+	struct ethsw_port_priv *port_priv;
+	struct ethsw_core *ethsw;
+	struct device *dev;
+	int i;
+
+	dev = &sw_dev->dev;
+	ethsw = dev_get_drvdata(dev);
+
+	dpaa2_switch_ctrl_if_teardown(ethsw);
+
+	dpaa2_switch_teardown_irqs(sw_dev);
+
+	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		port_priv = ethsw->ports[i];
+		unregister_netdev(port_priv->netdev);
+		free_netdev(port_priv->netdev);
+	}
+
+	kfree(ethsw->fdbs);
+	kfree(ethsw->ports);
+
+	dpaa2_switch_takedown(sw_dev);
+
+	destroy_workqueue(ethsw->workqueue);
+
+	fsl_mc_portal_free(ethsw->mc_io);
+
+	kfree(ethsw);
+
+	dev_set_drvdata(dev, NULL);
+
+	return 0;
+}
+
+static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
+				   u16 port_idx)
+{
+	struct ethsw_port_priv *port_priv;
+	struct device *dev = ethsw->dev;
+	struct net_device *port_netdev;
+	int err;
+
+	port_netdev = alloc_etherdev(sizeof(struct ethsw_port_priv));
+	if (!port_netdev) {
+		dev_err(dev, "alloc_etherdev error\n");
+		return -ENOMEM;
+	}
+
+	port_priv = netdev_priv(port_netdev);
+	port_priv->netdev = port_netdev;
+	port_priv->ethsw_data = ethsw;
+
+	port_priv->idx = port_idx;
+	port_priv->stp_state = BR_STATE_FORWARDING;
+
+	SET_NETDEV_DEV(port_netdev, dev);
+	port_netdev->netdev_ops = &dpaa2_switch_port_ops;
+	port_netdev->ethtool_ops = &dpaa2_switch_port_ethtool_ops;
+
+	port_netdev->needed_headroom = DPAA2_SWITCH_NEEDED_HEADROOM;
+
+	port_priv->bcast_flood = true;
+	port_priv->ucast_flood = true;
+
+	/* Set MTU limits */
+	port_netdev->min_mtu = ETH_MIN_MTU;
+	port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
+
+	/* Populate the private port structure so that later calls to
+	 * dpaa2_switch_port_init() can use it.
+	 */
+	ethsw->ports[port_idx] = port_priv;
+
+	/* The DPAA2 switch's ingress path depends on the VLAN table,
+	 * thus we are not able to disable VLAN filtering.
+	 */
+	port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER;
+
+	err = dpaa2_switch_port_init(port_priv, port_idx);
+	if (err)
+		goto err_port_probe;
+
+	err = dpaa2_switch_port_set_mac_addr(port_priv);
+	if (err)
+		goto err_port_probe;
+
+	err = dpaa2_switch_port_set_learning(port_priv, false);
+	if (err)
+		goto err_port_probe;
+
+	return 0;
+
+err_port_probe:
+	free_netdev(port_netdev);
+	ethsw->ports[port_idx] = NULL;
+
+	return err;
+}
+
+static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw;
+	int i, err;
+
+	/* Allocate switch core*/
+	ethsw = kzalloc(sizeof(*ethsw), GFP_KERNEL);
+
+	if (!ethsw)
+		return -ENOMEM;
+
+	ethsw->dev = dev;
+	ethsw->iommu_domain = iommu_get_domain_for_dev(dev);
+	dev_set_drvdata(dev, ethsw);
+
+	err = fsl_mc_portal_allocate(sw_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
+				     &ethsw->mc_io);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "fsl_mc_portal_allocate err %d\n", err);
+		goto err_free_drvdata;
+	}
+
+	err = dpaa2_switch_init(sw_dev);
+	if (err)
+		goto err_free_cmdport;
+
+	ethsw->ports = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->ports),
+			       GFP_KERNEL);
+	if (!(ethsw->ports)) {
+		err = -ENOMEM;
+		goto err_takedown;
+	}
+
+	ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs),
+			      GFP_KERNEL);
+	if (!ethsw->fdbs) {
+		err = -ENOMEM;
+		goto err_free_ports;
+	}
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = dpaa2_switch_probe_port(ethsw, i);
+		if (err)
+			goto err_free_netdev;
+	}
+
+	/* Add a NAPI instance for each of the Rx queues. The first port's
+	 * net_device will be associated with the instances since we do not have
+	 * different queues for each switch ports.
+	 */
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		netif_napi_add(ethsw->ports[0]->netdev,
+			       &ethsw->fq[i].napi, dpaa2_switch_poll,
+			       NAPI_POLL_WEIGHT);
+
+	err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
+		goto err_free_netdev;
+	}
+
+	/* Setup IRQs */
+	err = dpaa2_switch_setup_irqs(sw_dev);
+	if (err)
+		goto err_stop;
+
+	/* Register the netdev only when the entire setup is done and the
+	 * switch port interfaces are ready to receive traffic
+	 */
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = register_netdev(ethsw->ports[i]->netdev);
+		if (err < 0) {
+			dev_err(dev, "register_netdev error %d\n", err);
+			goto err_unregister_ports;
+		}
+	}
+
+	return 0;
+
+err_unregister_ports:
+	for (i--; i >= 0; i--)
+		unregister_netdev(ethsw->ports[i]->netdev);
+	dpaa2_switch_teardown_irqs(sw_dev);
+err_stop:
+	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+err_free_netdev:
+	for (i--; i >= 0; i--)
+		free_netdev(ethsw->ports[i]->netdev);
+	kfree(ethsw->fdbs);
+err_free_ports:
+	kfree(ethsw->ports);
+
+err_takedown:
+	dpaa2_switch_takedown(sw_dev);
+
+err_free_cmdport:
+	fsl_mc_portal_free(ethsw->mc_io);
+
+err_free_drvdata:
+	kfree(ethsw);
+	dev_set_drvdata(dev, NULL);
+
+	return err;
+}
+
+static const struct fsl_mc_device_id dpaa2_switch_match_id_table[] = {
+	{
+		.vendor = FSL_MC_VENDOR_FREESCALE,
+		.obj_type = "dpsw",
+	},
+	{ .vendor = 0x0 }
+};
+MODULE_DEVICE_TABLE(fslmc, dpaa2_switch_match_id_table);
+
+static struct fsl_mc_driver dpaa2_switch_drv = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = dpaa2_switch_probe,
+	.remove = dpaa2_switch_remove,
+	.match_id_table = dpaa2_switch_match_id_table
+};
+
+static struct notifier_block dpaa2_switch_port_nb __read_mostly = {
+	.notifier_call = dpaa2_switch_port_netdevice_event,
+};
+
+static struct notifier_block dpaa2_switch_port_switchdev_nb = {
+	.notifier_call = dpaa2_switch_port_event,
+};
+
+static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb = {
+	.notifier_call = dpaa2_switch_port_blocking_event,
+};
+
+static int dpaa2_switch_register_notifiers(void)
+{
+	int err;
+
+	err = register_netdevice_notifier(&dpaa2_switch_port_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register net_device notifier (%d)\n", err);
+		return err;
+	}
+
+	err = register_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register switchdev notifier (%d)\n", err);
+		goto err_switchdev_nb;
+	}
+
+	err = register_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register switchdev blocking notifier (%d)\n", err);
+		goto err_switchdev_blocking_nb;
+	}
+
+	return 0;
+
+err_switchdev_blocking_nb:
+	unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+err_switchdev_nb:
+	unregister_netdevice_notifier(&dpaa2_switch_port_nb);
+
+	return err;
+}
+
+static void dpaa2_switch_unregister_notifiers(void)
+{
+	int err;
+
+	err = unregister_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister switchdev blocking notifier (%d)\n",
+		       err);
+
+	err = unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister switchdev notifier (%d)\n", err);
+
+	err = unregister_netdevice_notifier(&dpaa2_switch_port_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister net_device notifier (%d)\n", err);
+}
+
+static int __init dpaa2_switch_driver_init(void)
+{
+	int err;
+
+	err = fsl_mc_driver_register(&dpaa2_switch_drv);
+	if (err)
+		return err;
+
+	err = dpaa2_switch_register_notifiers();
+	if (err) {
+		fsl_mc_driver_unregister(&dpaa2_switch_drv);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit dpaa2_switch_driver_exit(void)
+{
+	dpaa2_switch_unregister_notifiers();
+	fsl_mc_driver_unregister(&dpaa2_switch_drv);
+}
+
+module_init(dpaa2_switch_driver_init);
+module_exit(dpaa2_switch_driver_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DPAA2 Ethernet Switch Driver");
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
new file mode 100644
index 000000000000..549218994243
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DPAA2 Ethernet Switch declarations
+ *
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#ifndef __ETHSW_H
+#define __ETHSW_H
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <uapi/linux/if_bridge.h>
+#include <net/switchdev.h>
+#include <linux/if_bridge.h>
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
+
+#include "dpsw.h"
+
+/* Number of IRQs supported */
+#define DPSW_IRQ_NUM	2
+
+/* Port is member of VLAN */
+#define ETHSW_VLAN_MEMBER	1
+/* VLAN to be treated as untagged on egress */
+#define ETHSW_VLAN_UNTAGGED	2
+/* Untagged frames will be assigned to this VLAN */
+#define ETHSW_VLAN_PVID		4
+/* VLAN configured on the switch */
+#define ETHSW_VLAN_GLOBAL	8
+
+/* Maximum Frame Length supported by HW (currently 10k) */
+#define DPAA2_MFL		(10 * 1024)
+#define ETHSW_MAX_FRAME_LENGTH	(DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN)
+#define ETHSW_L2_MAX_FRM(mtu)	((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN)
+
+#define ETHSW_FEATURE_MAC_ADDR	BIT(0)
+
+/* Number of receive queues (one RX and one TX_CONF) */
+#define DPAA2_SWITCH_RX_NUM_FQS	2
+
+/* Hardware requires alignment for ingress/egress buffer addresses */
+#define DPAA2_SWITCH_RX_BUF_RAW_SIZE	PAGE_SIZE
+#define DPAA2_SWITCH_RX_BUF_TAILROOM \
+	SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+#define DPAA2_SWITCH_RX_BUF_SIZE \
+	(DPAA2_SWITCH_RX_BUF_RAW_SIZE - DPAA2_SWITCH_RX_BUF_TAILROOM)
+
+#define DPAA2_SWITCH_STORE_SIZE 16
+
+/* Buffer management */
+#define BUFS_PER_CMD			7
+#define DPAA2_ETHSW_NUM_BUFS		(1024 * BUFS_PER_CMD)
+#define DPAA2_ETHSW_REFILL_THRESH	(DPAA2_ETHSW_NUM_BUFS * 5 / 6)
+
+/* Number of times to retry DPIO portal operations while waiting
+ * for portal to finish executing current command and become
+ * available. We want to avoid being stuck in a while loop in case
+ * hardware becomes unresponsive, but not give up too easily if
+ * the portal really is busy for valid reasons
+ */
+#define DPAA2_SWITCH_SWP_BUSY_RETRIES		1000
+
+/* Hardware annotation buffer size */
+#define DPAA2_SWITCH_HWA_SIZE			64
+/* Software annotation buffer size */
+#define DPAA2_SWITCH_SWA_SIZE			64
+
+#define DPAA2_SWITCH_TX_BUF_ALIGN		64
+
+#define DPAA2_SWITCH_TX_DATA_OFFSET \
+	(DPAA2_SWITCH_HWA_SIZE + DPAA2_SWITCH_SWA_SIZE)
+
+#define DPAA2_SWITCH_NEEDED_HEADROOM \
+	(DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN)
+
+extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
+
+struct ethsw_core;
+
+struct dpaa2_switch_fq {
+	struct ethsw_core *ethsw;
+	enum dpsw_queue_type type;
+	struct dpaa2_io_store *store;
+	struct dpaa2_io_notification_ctx nctx;
+	struct napi_struct napi;
+	u32 fqid;
+};
+
+struct dpaa2_switch_fdb {
+	struct net_device	*bridge_dev;
+	u16			fdb_id;
+	bool			in_use;
+};
+
+/* Per port private data */
+struct ethsw_port_priv {
+	struct net_device	*netdev;
+	u16			idx;
+	struct ethsw_core	*ethsw_data;
+	u8			link_state;
+	u8			stp_state;
+
+	u8			vlans[VLAN_VID_MASK + 1];
+	u16			pvid;
+	u16			tx_qdid;
+
+	struct dpaa2_switch_fdb	*fdb;
+	bool			bcast_flood;
+	bool			ucast_flood;
+};
+
+/* Switch data */
+struct ethsw_core {
+	struct device			*dev;
+	struct fsl_mc_io		*mc_io;
+	u16				dpsw_handle;
+	struct dpsw_attr		sw_attr;
+	u16				major, minor;
+	unsigned long			features;
+	int				dev_id;
+	struct ethsw_port_priv		**ports;
+	struct iommu_domain		*iommu_domain;
+
+	u8				vlans[VLAN_VID_MASK + 1];
+
+	struct workqueue_struct		*workqueue;
+
+	struct dpaa2_switch_fq		fq[DPAA2_SWITCH_RX_NUM_FQS];
+	struct fsl_mc_device		*dpbp_dev;
+	int				buf_count;
+	u16				bpid;
+	int				napi_users;
+
+	struct dpaa2_switch_fdb		*fdbs;
+};
+
+static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
+{
+	if (ethsw->sw_attr.options & DPSW_OPT_CTRL_IF_DIS) {
+		dev_err(ethsw->dev, "Control Interface is disabled, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.flooding_cfg != DPSW_FLOODING_PER_FDB) {
+		dev_err(ethsw->dev, "Flooding domain is not per FDB, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.broadcast_cfg != DPSW_BROADCAST_PER_FDB) {
+		dev_err(ethsw->dev, "Broadcast domain is not per FDB, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.max_fdbs < ethsw->sw_attr.num_ifs) {
+		dev_err(ethsw->dev, "The number of FDBs is lower than the number of ports, cannot probe\n");
+		return false;
+	}
+
+	return true;
+}
+
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
+
+int dpaa2_switch_port_vlans_add(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan);
+
+int dpaa2_switch_port_vlans_del(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan);
+
+typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv,
+				  struct fdb_dump_entry *fdb_entry,
+				  void *data);
+#endif	/* __ETHSW_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpkg.h b/drivers/net/ethernet/freescale/dpaa2/dpkg.h
index 6de613b13e4d..6f596a5fbeeb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpkg.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpkg.h
@@ -13,11 +13,12 @@
 /** Key Generator properties */
 
 /**
- * Number of masks per key extraction
+ * DPKG_NUM_OF_MASKS - Number of masks per key extraction
  */
 #define DPKG_NUM_OF_MASKS		4
+
 /**
- * Number of extractions per key profile
+ * DPKG_MAX_NUM_OF_EXTRACTS - Number of extractions per key profile
  */
 #define DPKG_MAX_NUM_OF_EXTRACTS	10
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.h b/drivers/net/ethernet/freescale/dpaa2/dpmac.h
index 135f143097a5..8f7ceb731282 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpmac.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.h
@@ -83,39 +83,21 @@ int dpmac_get_attributes(struct fsl_mc_io *mc_io,
 			 u16 token,
 			 struct dpmac_attr *attr);
 
-/**
- * DPMAC link configuration/state options
- */
+/* DPMAC link configuration/state options */
 
-/**
- * Enable auto-negotiation
- */
 #define DPMAC_LINK_OPT_AUTONEG			BIT_ULL(0)
-/**
- * Enable half-duplex mode
- */
 #define DPMAC_LINK_OPT_HALF_DUPLEX		BIT_ULL(1)
-/**
- * Enable pause frames
- */
 #define DPMAC_LINK_OPT_PAUSE			BIT_ULL(2)
-/**
- * Enable a-symmetric pause frames
- */
 #define DPMAC_LINK_OPT_ASYM_PAUSE		BIT_ULL(3)
 
-/**
- * Advertised link speeds
- */
+/* Advertised link speeds */
 #define DPMAC_ADVERTISED_10BASET_FULL		BIT_ULL(0)
 #define DPMAC_ADVERTISED_100BASET_FULL		BIT_ULL(1)
 #define DPMAC_ADVERTISED_1000BASET_FULL		BIT_ULL(2)
 #define DPMAC_ADVERTISED_10000BASET_FULL	BIT_ULL(4)
 #define DPMAC_ADVERTISED_2500BASEX_FULL		BIT_ULL(5)
 
-/**
- * Advertise auto-negotiation enable
- */
+/* Advertise auto-negotiation enable */
 #define DPMAC_ADVERTISED_AUTONEG		BIT_ULL(3)
 
 /**
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index aa429c17c343..d6afada99fb6 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -17,6 +17,8 @@
  * This function has to be called before the following functions:
  *	- dpni_set_rx_tc_dist()
  *	- dpni_set_qos_table()
+ *
+ * Return:	'0' on Success; Error code otherwise.
  */
 int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg, u8 *key_cfg_buf)
 {
@@ -1793,6 +1795,8 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io,
  * If cfg.enable is set to 0 the command will clear flow steering table.
  * The packets will be classified according to settings made in
  * dpni_set_rx_hash_dist()
+ *
+ * Return:	'0' on Success; Error code otherwise.
  */
 int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io,
 			u32 cmd_flags,
@@ -1826,6 +1830,8 @@ int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io,
  * If cfg.enable is set to 1 the packets will be classified using a hash
  * function based on the key received in cfg.key_cfg_iova parameter.
  * If cfg.enable is set to 0 the packets will be sent to the default queue
+ *
+ * Return:	'0' on Success; Error code otherwise.
  */
 int dpni_set_rx_hash_dist(struct fsl_mc_io *mc_io,
 			  u32 cmd_flags,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index 4e96d9362dd2..7de0562bbf59 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -10,73 +10,76 @@
 
 struct fsl_mc_io;
 
-/**
- * Data Path Network Interface API
+/* Data Path Network Interface API
  * Contains initialization APIs and runtime control APIs for DPNI
  */
 
 /** General DPNI macros */
 
 /**
- * Maximum number of traffic classes
+ * DPNI_MAX_TC - Maximum number of traffic classes
  */
 #define DPNI_MAX_TC				8
 /**
- * Maximum number of buffer pools per DPNI
+ * DPNI_MAX_DPBP - Maximum number of buffer pools per DPNI
  */
 #define DPNI_MAX_DPBP				8
 
 /**
- * All traffic classes considered; see dpni_set_queue()
+ * DPNI_ALL_TCS - All traffic classes considered; see dpni_set_queue()
  */
 #define DPNI_ALL_TCS				(u8)(-1)
 /**
- * All flows within traffic class considered; see dpni_set_queue()
+ * DPNI_ALL_TC_FLOWS - All flows within traffic class considered; see
+ * dpni_set_queue()
  */
 #define DPNI_ALL_TC_FLOWS			(u16)(-1)
 /**
- * Generate new flow ID; see dpni_set_queue()
+ * DPNI_NEW_FLOW_ID - Generate new flow ID; see dpni_set_queue()
  */
 #define DPNI_NEW_FLOW_ID			(u16)(-1)
 
 /**
- * Tx traffic is always released to a buffer pool on transmit, there are no
- * resources allocated to have the frames confirmed back to the source after
- * transmission.
+ * DPNI_OPT_TX_FRM_RELEASE - Tx traffic is always released to a buffer pool on
+ * transmit, there are no resources allocated to have the frames confirmed back
+ * to the source after transmission.
  */
 #define DPNI_OPT_TX_FRM_RELEASE			0x000001
 /**
- * Disables support for MAC address filtering for addresses other than primary
- * MAC address. This affects both unicast and multicast. Promiscuous mode can
- * still be enabled/disabled for both unicast and multicast. If promiscuous mode
- * is disabled, only traffic matching the primary MAC address will be accepted.
+ * DPNI_OPT_NO_MAC_FILTER - Disables support for MAC address filtering for
+ * addresses other than primary MAC address. This affects both unicast and
+ * multicast. Promiscuous mode can still be enabled/disabled for both unicast
+ * and multicast. If promiscuous mode is disabled, only traffic matching the
+ * primary MAC address will be accepted.
  */
 #define DPNI_OPT_NO_MAC_FILTER			0x000002
 /**
- * Allocate policers for this DPNI. They can be used to rate-limit traffic per
- * traffic class (TC) basis.
+ * DPNI_OPT_HAS_POLICING - Allocate policers for this DPNI. They can be used to
+ * rate-limit traffic per traffic class (TC) basis.
  */
 #define DPNI_OPT_HAS_POLICING			0x000004
 /**
- * Congestion can be managed in several ways, allowing the buffer pool to
- * deplete on ingress, taildrop on each queue or use congestion groups for sets
- * of queues. If set, it configures a single congestion groups across all TCs.
- * If reset, a congestion group is allocated for each TC. Only relevant if the
- * DPNI has multiple traffic classes.
+ * DPNI_OPT_SHARED_CONGESTION - Congestion can be managed in several ways,
+ * allowing the buffer pool to deplete on ingress, taildrop on each queue or
+ * use congestion groups for sets of queues. If set, it configures a single
+ * congestion groups across all TCs.  If reset, a congestion group is allocated
+ * for each TC. Only relevant if the DPNI has multiple traffic classes.
  */
 #define DPNI_OPT_SHARED_CONGESTION		0x000008
 /**
- * Enables TCAM for Flow Steering and QoS look-ups. If not specified, all
- * look-ups are exact match. Note that TCAM is not available on LS1088 and its
- * variants. Setting this bit on these SoCs will trigger an error.
+ * DPNI_OPT_HAS_KEY_MASKING - Enables TCAM for Flow Steering and QoS look-ups.
+ * If not specified, all look-ups are exact match. Note that TCAM is not
+ * available on LS1088 and its variants. Setting this bit on these SoCs will
+ * trigger an error.
  */
 #define DPNI_OPT_HAS_KEY_MASKING		0x000010
 /**
- * Disables the flow steering table.
+ * DPNI_OPT_NO_FS - Disables the flow steering table.
  */
 #define DPNI_OPT_NO_FS				0x000020
 /**
- * Flow steering table is shared between all traffic classes
+ * DPNI_OPT_SHARED_FS - Flow steering table is shared between all traffic
+ * classes
  */
 #define DPNI_OPT_SHARED_FS			0x001000
 
@@ -129,20 +132,14 @@ int dpni_reset(struct fsl_mc_io	*mc_io,
 	       u32		cmd_flags,
 	       u16		token);
 
-/**
- * DPNI IRQ Index and Events
- */
+/* DPNI IRQ Index and Events */
 
-/**
- * IRQ index
- */
 #define DPNI_IRQ_INDEX				0
-/**
- * IRQ events:
- *       indicates a change in link state
- *       indicates a change in endpoint
- */
+
+/* DPNI_IRQ_EVENT_LINK_CHANGED - indicates a change in link state */
 #define DPNI_IRQ_EVENT_LINK_CHANGED		0x00000001
+
+/* DPNI_IRQ_EVENT_ENDPOINT_CHANGED - indicates a change in endpoint */
 #define DPNI_IRQ_EVENT_ENDPOINT_CHANGED		0x00000002
 
 int dpni_set_irq_enable(struct fsl_mc_io	*mc_io,
@@ -222,32 +219,30 @@ int dpni_get_attributes(struct fsl_mc_io	*mc_io,
 			u16			token,
 			struct dpni_attr	*attr);
 
-/**
- * DPNI errors
- */
+/* DPNI errors */
 
 /**
- * Extract out of frame header error
+ * DPNI_ERROR_EOFHE - Extract out of frame header error
  */
 #define DPNI_ERROR_EOFHE	0x00020000
 /**
- * Frame length error
+ * DPNI_ERROR_FLE - Frame length error
  */
 #define DPNI_ERROR_FLE		0x00002000
 /**
- * Frame physical error
+ * DPNI_ERROR_FPE - Frame physical error
  */
 #define DPNI_ERROR_FPE		0x00001000
 /**
- * Parsing header error
+ * DPNI_ERROR_PHE - Parsing header error
  */
 #define DPNI_ERROR_PHE		0x00000020
 /**
- * Parser L3 checksum error
+ * DPNI_ERROR_L3CE - Parser L3 checksum error
  */
 #define DPNI_ERROR_L3CE		0x00000004
 /**
- * Parser L3 checksum error
+ * DPNI_ERROR_L4CE - Parser L3 checksum error
  */
 #define DPNI_ERROR_L4CE		0x00000001
 
@@ -281,36 +276,35 @@ int dpni_set_errors_behavior(struct fsl_mc_io		*mc_io,
 			     u16			token,
 			     struct dpni_error_cfg	*cfg);
 
-/**
- * DPNI buffer layout modification options
- */
+/* DPNI buffer layout modification options */
 
 /**
- * Select to modify the time-stamp setting
+ * DPNI_BUF_LAYOUT_OPT_TIMESTAMP - Select to modify the time-stamp setting
  */
 #define DPNI_BUF_LAYOUT_OPT_TIMESTAMP		0x00000001
 /**
- * Select to modify the parser-result setting; not applicable for Tx
+ * DPNI_BUF_LAYOUT_OPT_PARSER_RESULT - Select to modify the parser-result
+ * setting; not applicable for Tx
  */
 #define DPNI_BUF_LAYOUT_OPT_PARSER_RESULT	0x00000002
 /**
- * Select to modify the frame-status setting
+ * DPNI_BUF_LAYOUT_OPT_FRAME_STATUS - Select to modify the frame-status setting
  */
 #define DPNI_BUF_LAYOUT_OPT_FRAME_STATUS	0x00000004
 /**
- * Select to modify the private-data-size setting
+ * DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE - Select to modify the private-data-size setting
  */
 #define DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE	0x00000008
 /**
- * Select to modify the data-alignment setting
+ * DPNI_BUF_LAYOUT_OPT_DATA_ALIGN - Select to modify the data-alignment setting
  */
 #define DPNI_BUF_LAYOUT_OPT_DATA_ALIGN		0x00000010
 /**
- * Select to modify the data-head-room setting
+ * DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM - Select to modify the data-head-room setting
  */
 #define DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM	0x00000020
 /**
- * Select to modify the data-tail-room setting
+ * DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM - Select to modify the data-tail-room setting
  */
 #define DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM	0x00000040
 
@@ -343,7 +337,8 @@ struct dpni_buffer_layout {
  * @DPNI_QUEUE_TX: Tx queue
  * @DPNI_QUEUE_TX_CONFIRM: Tx confirmation queue
  * @DPNI_QUEUE_RX_ERR: Rx error queue
- */enum dpni_queue_type {
+ */
+enum dpni_queue_type {
 	DPNI_QUEUE_RX,
 	DPNI_QUEUE_TX,
 	DPNI_QUEUE_TX_CONFIRM,
@@ -424,7 +419,7 @@ int dpni_get_tx_data_offset(struct fsl_mc_io	*mc_io,
  *	lack of buffers
  * @page_2.egress_discarded_frames: Egress discarded frame count
  * @page_2.egress_confirmed_frames: Egress confirmed frame count
- * @page3: Page_3 statistics structure
+ * @page_3: Page_3 statistics structure
  * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes
  *	dequeued from egress FQs
  * @page_3.egress_dequeue_frames: Cumulative count of the number of frames
@@ -501,30 +496,14 @@ int dpni_get_statistics(struct fsl_mc_io	*mc_io,
 			u8			page,
 			union dpni_statistics	*stat);
 
-/**
- * Enable auto-negotiation
- */
 #define DPNI_LINK_OPT_AUTONEG		0x0000000000000001ULL
-/**
- * Enable half-duplex mode
- */
 #define DPNI_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
-/**
- * Enable pause frames
- */
 #define DPNI_LINK_OPT_PAUSE		0x0000000000000004ULL
-/**
- * Enable a-symmetric pause frames
- */
 #define DPNI_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
-
-/**
- * Enable priority flow control pause frames
- */
 #define DPNI_LINK_OPT_PFC_PAUSE		0x0000000000000010ULL
 
 /**
- * struct - Structure representing DPNI link configuration
+ * struct dpni_link_cfg - Structure representing DPNI link configuration
  * @rate: Rate
  * @options: Mask of available options; use 'DPNI_LINK_OPT_<X>' values
  */
@@ -687,8 +666,8 @@ int dpni_set_rx_tc_dist(struct fsl_mc_io			*mc_io,
 			const struct dpni_rx_tc_dist_cfg	*cfg);
 
 /**
- * When used for fs_miss_flow_id in function dpni_set_rx_dist,
- * will signal to dpni to drop all unclassified frames
+ * DPNI_FS_MISS_DROP - When used for fs_miss_flow_id in function
+ * dpni_set_rx_dist, will signal to dpni to drop all unclassified frames
  */
 #define DPNI_FS_MISS_DROP		((uint16_t)-1)
 
@@ -766,7 +745,7 @@ enum dpni_dest {
 
 /**
  * struct dpni_queue - Queue structure
- * @destination - Destination structure
+ * @destination: - Destination structure
  * @destination.id: ID of the destination, only relevant if DEST_TYPE is > 0.
  *	Identifies either a DPIO or a DPCON object.
  *	Not relevant for Tx queues.
@@ -837,9 +816,7 @@ struct dpni_queue_id {
 	u16 qdbin;
 };
 
-/**
- * Set User Context
- */
+/* Set User Context */
 #define DPNI_QUEUE_OPT_USER_CTX		0x00000001
 #define DPNI_QUEUE_OPT_DEST		0x00000002
 #define DPNI_QUEUE_OPT_FLC		0x00000004
@@ -904,9 +881,9 @@ struct dpni_dest_cfg {
 /* DPNI congestion options */
 
 /**
- * This congestion will trigger flow control or priority flow control.
- * This will have effect only if flow control is enabled with
- * dpni_set_link_cfg().
+ * DPNI_CONG_OPT_FLOW_CONTROL - This congestion will trigger flow control or
+ * priority flow control.  This will have effect only if flow control is
+ * enabled with dpni_set_link_cfg().
  */
 #define DPNI_CONG_OPT_FLOW_CONTROL		0x00000040
 
@@ -990,23 +967,24 @@ struct dpni_rule_cfg {
 };
 
 /**
- * Discard matching traffic. If set, this takes precedence over any other
- * configuration and matching traffic is always discarded.
+ * DPNI_FS_OPT_DISCARD - Discard matching traffic. If set, this takes
+ * precedence over any other configuration and matching traffic is always
+ * discarded.
  */
  #define DPNI_FS_OPT_DISCARD            0x1
 
 /**
- * Set FLC value. If set, flc member of struct dpni_fs_action_cfg is used to
- * override the FLC value set per queue.
+ * DPNI_FS_OPT_SET_FLC - Set FLC value. If set, flc member of struct
+ * dpni_fs_action_cfg is used to override the FLC value set per queue.
  * For more details check the Frame Descriptor section in the hardware
  * documentation.
  */
 #define DPNI_FS_OPT_SET_FLC            0x2
 
 /**
- * Indicates whether the 6 lowest significant bits of FLC are used for stash
- * control. If set, the 6 least significant bits in value are interpreted as
- * follows:
+ * DPNI_FS_OPT_SET_STASH_CONTROL - Indicates whether the 6 lowest significant
+ * bits of FLC are used for stash control. If set, the 6 least significant bits
+ * in value are interpreted as follows:
  *     - bits 0-1: indicates the number of 64 byte units of context that are
  *     stashed. FLC value is interpreted as a memory address in this case,
  *     excluding the 6 LS bits.
@@ -1068,7 +1046,7 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io,
 			 u16 *major_ver,
 			 u16 *minor_ver);
 /**
- * struct dpni_tx_shaping - Structure representing DPNI tx shaping configuration
+ * struct dpni_tx_shaping_cfg - Structure representing DPNI tx shaping configuration
  * @rate_limit:		Rate in Mbps
  * @max_burst_size:	Burst size in bytes (up to 64KB)
  */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
index 05c413719e55..01d77c685a5b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
@@ -13,9 +13,6 @@
 
 struct fsl_mc_io;
 
-/**
- * Number of irq's
- */
 #define DPRTC_MAX_IRQ_NUM	1
 #define DPRTC_IRQ_INDEX		0
 
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 450841cc6ca8..24b17d6e09af 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -1,16 +1,18 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2020 NXP
+ * Copyright 2017-2021 NXP
  *
  */
 
 #ifndef __FSL_DPSW_CMD_H
 #define __FSL_DPSW_CMD_H
 
+#include "dpsw.h"
+
 /* DPSW Version */
 #define DPSW_VER_MAJOR		8
-#define DPSW_VER_MINOR		5
+#define DPSW_VER_MINOR		9
 
 #define DPSW_CMD_BASE_VERSION	1
 #define DPSW_CMD_VERSION_2	2
@@ -27,7 +29,7 @@
 
 #define DPSW_CMDID_ENABLE                   DPSW_CMD_ID(0x002)
 #define DPSW_CMDID_DISABLE                  DPSW_CMD_ID(0x003)
-#define DPSW_CMDID_GET_ATTR                 DPSW_CMD_ID(0x004)
+#define DPSW_CMDID_GET_ATTR                 DPSW_CMD_V2(0x004)
 #define DPSW_CMDID_RESET                    DPSW_CMD_ID(0x005)
 
 #define DPSW_CMDID_SET_IRQ_ENABLE           DPSW_CMD_ID(0x012)
@@ -45,18 +47,18 @@
 #define DPSW_CMDID_IF_ENABLE                DPSW_CMD_ID(0x03D)
 #define DPSW_CMDID_IF_DISABLE               DPSW_CMD_ID(0x03E)
 
+#define DPSW_CMDID_IF_GET_ATTR              DPSW_CMD_ID(0x042)
+
 #define DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH  DPSW_CMD_ID(0x044)
 
 #define DPSW_CMDID_IF_GET_LINK_STATE        DPSW_CMD_ID(0x046)
-#define DPSW_CMDID_IF_SET_FLOODING          DPSW_CMD_ID(0x047)
-#define DPSW_CMDID_IF_SET_BROADCAST         DPSW_CMD_ID(0x048)
 
 #define DPSW_CMDID_IF_GET_TCI               DPSW_CMD_ID(0x04A)
 
 #define DPSW_CMDID_IF_SET_LINK_CFG          DPSW_CMD_ID(0x04C)
 
 #define DPSW_CMDID_VLAN_ADD                 DPSW_CMD_ID(0x060)
-#define DPSW_CMDID_VLAN_ADD_IF              DPSW_CMD_ID(0x061)
+#define DPSW_CMDID_VLAN_ADD_IF              DPSW_CMD_V2(0x061)
 #define DPSW_CMDID_VLAN_ADD_IF_UNTAGGED     DPSW_CMD_ID(0x062)
 
 #define DPSW_CMDID_VLAN_REMOVE_IF           DPSW_CMD_ID(0x064)
@@ -64,16 +66,24 @@
 #define DPSW_CMDID_VLAN_REMOVE_IF_FLOODING  DPSW_CMD_ID(0x066)
 #define DPSW_CMDID_VLAN_REMOVE              DPSW_CMD_ID(0x067)
 
+#define DPSW_CMDID_FDB_ADD                  DPSW_CMD_ID(0x082)
+#define DPSW_CMDID_FDB_REMOVE               DPSW_CMD_ID(0x083)
 #define DPSW_CMDID_FDB_ADD_UNICAST          DPSW_CMD_ID(0x084)
 #define DPSW_CMDID_FDB_REMOVE_UNICAST       DPSW_CMD_ID(0x085)
 #define DPSW_CMDID_FDB_ADD_MULTICAST        DPSW_CMD_ID(0x086)
 #define DPSW_CMDID_FDB_REMOVE_MULTICAST     DPSW_CMD_ID(0x087)
-#define DPSW_CMDID_FDB_SET_LEARNING_MODE    DPSW_CMD_ID(0x088)
 #define DPSW_CMDID_FDB_DUMP                 DPSW_CMD_ID(0x08A)
 
 #define DPSW_CMDID_IF_GET_PORT_MAC_ADDR     DPSW_CMD_ID(0x0A7)
-#define DPSW_CMDID_IF_GET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A8)
-#define DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A9)
+
+#define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
+#define DPSW_CMDID_CTRL_IF_SET_POOLS        DPSW_CMD_ID(0x0A1)
+#define DPSW_CMDID_CTRL_IF_ENABLE           DPSW_CMD_ID(0x0A2)
+#define DPSW_CMDID_CTRL_IF_DISABLE          DPSW_CMD_ID(0x0A3)
+#define DPSW_CMDID_CTRL_IF_SET_QUEUE        DPSW_CMD_ID(0x0A6)
+
+#define DPSW_CMDID_SET_EGRESS_FLOOD         DPSW_CMD_ID(0x0AC)
+#define DPSW_CMDID_IF_SET_LEARNING_MODE     DPSW_CMD_ID(0x0AD)
 
 /* Macros for accessing command fields smaller than 1byte */
 #define DPSW_MASK(field)        \
@@ -169,6 +179,12 @@ struct dpsw_cmd_clear_irq_status {
 #define DPSW_COMPONENT_TYPE_SHIFT	0
 #define DPSW_COMPONENT_TYPE_SIZE	4
 
+#define DPSW_FLOODING_CFG_SHIFT		0
+#define DPSW_FLOODING_CFG_SIZE		4
+
+#define DPSW_BROADCAST_CFG_SHIFT	4
+#define DPSW_BROADCAST_CFG_SIZE		4
+
 struct dpsw_rsp_get_attr {
 	/* cmd word 0 */
 	__le16 num_ifs;
@@ -186,23 +202,15 @@ struct dpsw_rsp_get_attr {
 	u8 max_meters_per_if;
 	/* from LSB only the first 4 bits */
 	u8 component_type;
-	__le16 pad;
+	/* [0:3] - flooding configuration
+	 * [4:7] - broadcast configuration
+	 */
+	u8 repl_cfg;
+	u8 pad;
 	/* cmd word 3 */
 	__le64 options;
 };
 
-struct dpsw_cmd_if_set_flooding {
-	__le16 if_id;
-	/* from LSB: enable:1 */
-	u8 enable;
-};
-
-struct dpsw_cmd_if_set_broadcast {
-	__le16 if_id;
-	/* from LSB: enable:1 */
-	u8 enable;
-};
-
 #define DPSW_VLAN_ID_SHIFT	0
 #define DPSW_VLAN_ID_SIZE	12
 #define DPSW_DEI_SHIFT		12
@@ -255,6 +263,28 @@ struct dpsw_cmd_if {
 	__le16 if_id;
 };
 
+#define DPSW_ADMIT_UNTAGGED_SHIFT	0
+#define DPSW_ADMIT_UNTAGGED_SIZE	4
+#define DPSW_ENABLED_SHIFT		5
+#define DPSW_ENABLED_SIZE		1
+#define DPSW_ACCEPT_ALL_VLAN_SHIFT	6
+#define DPSW_ACCEPT_ALL_VLAN_SIZE	1
+
+struct dpsw_rsp_if_get_attr {
+	/* cmd word 0 */
+	/* from LSB: admit_untagged:4 enabled:1 accept_all_vlan:1 */
+	u8 conf;
+	u8 pad1;
+	u8 num_tcs;
+	u8 pad2;
+	__le16 qdid;
+	/* cmd word 1 */
+	__le32 options;
+	__le32 pad3;
+	/* cmd word 2 */
+	__le32 rate;
+};
+
 struct dpsw_cmd_if_set_max_frame_length {
 	__le16 if_id;
 	__le16 frame_length;
@@ -295,13 +325,23 @@ struct dpsw_vlan_add {
 	__le16 vlan_id;
 };
 
+struct dpsw_cmd_vlan_add_if {
+	/* cmd word 0 */
+	__le16 options;
+	__le16 vlan_id;
+	__le16 fdb_id;
+	__le16 pad0;
+	/* cmd word 1-4 */
+	__le64 if_id;
+};
+
 struct dpsw_cmd_vlan_manage_if {
 	/* cmd word 0 */
 	__le16 pad0;
 	__le16 vlan_id;
 	__le32 pad1;
 	/* cmd word 1-4 */
-	__le64 if_id[4];
+	__le64 if_id;
 };
 
 struct dpsw_cmd_vlan_remove {
@@ -311,7 +351,7 @@ struct dpsw_cmd_vlan_remove {
 
 struct dpsw_cmd_fdb_add {
 	__le32 pad;
-	__le16 fdb_aging_time;
+	__le16 fdb_ageing_time;
 	__le16 num_fdb_entries;
 };
 
@@ -347,16 +387,7 @@ struct dpsw_cmd_fdb_multicast_op {
 	u8 mac_addr[6];
 	__le16 pad2;
 	/* cmd word 2-5 */
-	__le64 if_id[4];
-};
-
-#define DPSW_LEARNING_MODE_SHIFT	0
-#define DPSW_LEARNING_MODE_SIZE		4
-
-struct dpsw_cmd_fdb_set_learning_mode {
-	__le16 fdb_id;
-	/* only the first 4 bits from LSB */
-	u8 mode;
+	__le64 if_id;
 };
 
 struct dpsw_cmd_fdb_dump {
@@ -371,6 +402,36 @@ struct dpsw_rsp_fdb_dump {
 	__le16 num_entries;
 };
 
+struct dpsw_rsp_ctrl_if_get_attr {
+	__le64 pad;
+	__le32 rx_fqid;
+	__le32 rx_err_fqid;
+	__le32 tx_err_conf_fqid;
+};
+
+#define DPSW_BACKUP_POOL(val, order)	(((val) & 0x1) << (order))
+struct dpsw_cmd_ctrl_if_set_pools {
+	u8 num_dpbp;
+	u8 backup_pool_mask;
+	__le16 pad;
+	__le32 dpbp_id[DPSW_MAX_DPBP];
+	__le16 buffer_size[DPSW_MAX_DPBP];
+};
+
+#define DPSW_DEST_TYPE_SHIFT	0
+#define DPSW_DEST_TYPE_SIZE	4
+
+struct dpsw_cmd_ctrl_if_set_queue {
+	__le32 dest_id;
+	u8 dest_priority;
+	u8 pad;
+	/* from LSB: dest_type:4 */
+	u8 dest_type;
+	u8 qtype;
+	__le64 user_ctx;
+	__le32 options;
+};
+
 struct dpsw_rsp_get_api_version {
 	__le16 version_major;
 	__le16 version_minor;
@@ -381,10 +442,20 @@ struct dpsw_rsp_if_get_mac_addr {
 	u8 mac_addr[6];
 };
 
-struct dpsw_cmd_if_set_mac_addr {
-	__le16 if_id;
-	u8 mac_addr[6];
+struct dpsw_cmd_set_egress_flood {
+	__le16 fdb_id;
+	u8 flood_type;
+	u8 pad[5];
+	__le64 if_id;
 };
 
+#define DPSW_LEARNING_MODE_SHIFT	0
+#define DPSW_LEARNING_MODE_SIZE		4
+
+struct dpsw_cmd_if_set_learning_mode {
+	__le16 if_id;
+	/* only the first 4 bits from LSB */
+	u8 mode;
+};
 #pragma pack(pop)
 #endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index f8bfe779bd30..6c787d4b85f9 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2021 NXP
  *
  */
 
@@ -9,9 +9,7 @@
 #include "dpsw.h"
 #include "dpsw-cmd.h"
 
-static void build_if_id_bitmap(__le64 *bmap,
-			       const u16 *id,
-			       const u16 num_ifs)
+static void build_if_id_bitmap(__le64 *bmap, const u16 *id, const u16 num_ifs)
 {
 	int i;
 
@@ -38,10 +36,7 @@ static void build_if_id_bitmap(__le64 *bmap,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_open(struct fsl_mc_io *mc_io,
-	      u32 cmd_flags,
-	      int dpsw_id,
-	      u16 *token)
+int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_open *cmd_params;
@@ -76,9 +71,7 @@ int dpsw_open(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_close(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token)
+int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -99,9 +92,7 @@ int dpsw_close(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_enable(struct fsl_mc_io *mc_io,
-		u32 cmd_flags,
-		u16 token)
+int dpsw_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -122,9 +113,7 @@ int dpsw_enable(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_disable(struct fsl_mc_io *mc_io,
-		 u32 cmd_flags,
-		 u16 token)
+int dpsw_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -145,9 +134,7 @@ int dpsw_disable(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_reset(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token)
+int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -175,11 +162,8 @@ int dpsw_reset(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u8 en)
+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u8 en)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_set_irq_enable *cmd_params;
@@ -212,11 +196,8 @@ int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
-		      u32 cmd_flags,
-		      u16 token,
-		      u8 irq_index,
-		      u32 mask)
+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      u8 irq_index, u32 mask)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_set_irq_mask *cmd_params;
@@ -245,11 +226,8 @@ int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u32 *status)
+int dpsw_get_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u32 *status)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_get_irq_status *cmd_params;
@@ -288,11 +266,8 @@ int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u8 irq_index,
-			  u32 status)
+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  u8 irq_index, u32 status)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_clear_irq_status *cmd_params;
@@ -318,9 +293,7 @@ int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_get_attributes(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
+int dpsw_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			struct dpsw_attr *attr)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -351,9 +324,9 @@ int dpsw_get_attributes(struct fsl_mc_io *mc_io,
 	attr->max_fdb_mc_groups = le16_to_cpu(rsp_params->max_fdb_mc_groups);
 	attr->max_meters_per_if = rsp_params->max_meters_per_if;
 	attr->options = le64_to_cpu(rsp_params->options);
-	attr->component_type = dpsw_get_field(rsp_params->component_type,
-					      COMPONENT_TYPE);
-
+	attr->component_type = dpsw_get_field(rsp_params->component_type, COMPONENT_TYPE);
+	attr->flooding_cfg = dpsw_get_field(rsp_params->repl_cfg, FLOODING_CFG);
+	attr->broadcast_cfg = dpsw_get_field(rsp_params->repl_cfg, BROADCAST_CFG);
 	return 0;
 }
 
@@ -367,10 +340,7 @@ int dpsw_get_attributes(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 			 struct dpsw_link_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -397,13 +367,10 @@ int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
  * @if_id:	Interface id
  * @state:	Link state	1 - linkup, 0 - link down or disconnected
  *
- * @Return	'0' on Success; Error code otherwise.
+ * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 if_id,
-			   struct dpsw_link_state *state)
+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_link_state *state)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if_get_link_state *cmd_params;
@@ -432,68 +399,6 @@ int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
 }
 
 /**
- * dpsw_if_set_flooding() - Enable Disable flooding for particular interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @en:		1 - enable, 0 - disable
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_flooding(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
-			 u8 en)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_flooding *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_FLOODING,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_flooding *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	dpsw_set_field(cmd_params->enable, ENABLE, en);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_if_set_broadcast() - Enable/disable broadcast for particular interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @en:		1 - enable, 0 - disable
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_broadcast(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u16 if_id,
-			  u8 en)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_broadcast *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_BROADCAST,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_broadcast *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	dpsw_set_field(cmd_params->enable, ENABLE, en);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
  * dpsw_if_set_tci() - Set default VLAN Tag Control Information (TCI)
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
@@ -503,10 +408,7 @@ int dpsw_if_set_broadcast(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_set_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    const struct dpsw_tci_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -538,10 +440,7 @@ int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_get_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    struct dpsw_tci_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -583,10 +482,7 @@ int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_set_stp(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    const struct dpsw_stp_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -616,12 +512,8 @@ int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 if_id,
-			enum dpsw_counter type,
-			u64 *counter)
+int dpsw_if_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 if_id, enum dpsw_counter type, u64 *counter)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if_get_counter *cmd_params;
@@ -657,10 +549,7 @@ int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_enable(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   u16 if_id)
+int dpsw_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if *cmd_params;
@@ -685,10 +574,7 @@ int dpsw_if_enable(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_disable(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id)
+int dpsw_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if *cmd_params;
@@ -705,6 +591,47 @@ int dpsw_if_disable(struct fsl_mc_io *mc_io,
 }
 
 /**
+ * dpsw_if_get_attributes() - Function obtains attributes of interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @attr:	Returned interface attributes
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_if_attr *attr)
+{
+	struct dpsw_rsp_if_get_attr *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if *cmd_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_ATTR, cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_if_get_attr *)cmd.params;
+	attr->num_tcs = rsp_params->num_tcs;
+	attr->rate = le32_to_cpu(rsp_params->rate);
+	attr->options = le32_to_cpu(rsp_params->options);
+	attr->qdid = le16_to_cpu(rsp_params->qdid);
+	attr->enabled = dpsw_get_field(rsp_params->conf, ENABLED);
+	attr->accept_all_vlan = dpsw_get_field(rsp_params->conf,
+					       ACCEPT_ALL_VLAN);
+	attr->admit_untagged = dpsw_get_field(rsp_params->conf,
+					      ADMIT_UNTAGGED);
+
+	return 0;
+}
+
+/**
  * dpsw_if_set_max_frame_length() - Set Maximum Receive frame length.
  * @mc_io:		Pointer to MC portal's I/O object
  * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
@@ -714,11 +641,8 @@ int dpsw_if_disable(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 if_id,
-				 u16 frame_length)
+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 if_id, u16 frame_length)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if_set_max_frame_length *cmd_params;
@@ -752,11 +676,8 @@ int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_add(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 vlan_id,
-		  const struct dpsw_vlan_cfg *cfg)
+int dpsw_vlan_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		  u16 vlan_id, const struct dpsw_vlan_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_vlan_add *cmd_params;
@@ -788,22 +709,21 @@ int dpsw_vlan_add(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id,
-		     const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		     u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
+	struct dpsw_cmd_vlan_add_if *cmd_params;
 	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_vlan_manage_if *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF,
 					  cmd_flags,
 					  token);
-	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
+	cmd_params = (struct dpsw_cmd_vlan_add_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	cmd_params->options = cpu_to_le16(cfg->options);
+	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -826,11 +746,8 @@ int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 vlan_id,
-			      const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_vlan_manage_if *cmd_params;
@@ -841,7 +758,7 @@ int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
 					  token);
 	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -860,11 +777,8 @@ int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 vlan_id,
-			const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_vlan_manage_if *cmd_params;
@@ -875,7 +789,7 @@ int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
 					  token);
 	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -896,11 +810,8 @@ int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 vlan_id,
-				 const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_vlan_manage_if *cmd_params;
@@ -911,7 +822,7 @@ int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
 					  token);
 	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -926,9 +837,7 @@ int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
+int dpsw_vlan_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 		     u16 vlan_id)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -946,6 +855,66 @@ int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
 }
 
 /**
+ * dpsw_fdb_add() - Add FDB to switch and Returns handle to FDB table for
+ *		the reference
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Returned Forwarding Database Identifier
+ * @cfg:	FDB Configuration
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
+		 const struct dpsw_fdb_cfg *cfg)
+{
+	struct dpsw_cmd_fdb_add *cmd_params;
+	struct dpsw_rsp_fdb_add *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_add *)cmd.params;
+	cmd_params->fdb_ageing_time = cpu_to_le16(cfg->fdb_ageing_time);
+	cmd_params->num_fdb_entries = cpu_to_le16(cfg->num_fdb_entries);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_fdb_add *)cmd.params;
+	*fdb_id = le16_to_cpu(rsp_params->fdb_id);
+
+	return 0;
+}
+
+/**
+ * dpsw_fdb_remove() - Remove FDB from switch
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id)
+{
+	struct dpsw_cmd_fdb_remove *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_remove *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
  * dpsw_fdb_add_unicast() - Function adds an unicast entry into MAC lookup table
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
@@ -955,11 +924,8 @@ int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 fdb_id,
-			 const struct dpsw_fdb_unicast_cfg *cfg)
+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			 u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_unicast_op *cmd_params;
@@ -998,13 +964,8 @@ int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
  * The struct fdb_dump_entry array must be parsed until the end of memory
  * area or until an entry with mac_addr set to zero is found.
  */
-int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 fdb_id,
-		  u64 iova_addr,
-		  u32 iova_size,
-		  u16 *num_entries)
+int dpsw_fdb_dump(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id,
+		  u64 iova_addr, u32 iova_size, u16 *num_entries)
 {
 	struct dpsw_cmd_fdb_dump *cmd_params;
 	struct dpsw_rsp_fdb_dump *rsp_params;
@@ -1041,11 +1002,8 @@ int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
-			    u32 cmd_flags,
-			    u16 token,
-			    u16 fdb_id,
-			    const struct dpsw_fdb_unicast_cfg *cfg)
+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			    u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_unicast_op *cmd_params;
@@ -1083,11 +1041,8 @@ int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 fdb_id,
-			   const struct dpsw_fdb_multicast_cfg *cfg)
+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_multicast_op *cmd_params;
@@ -1101,7 +1056,7 @@ int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
 	cmd_params->fdb_id = cpu_to_le16(fdb_id);
 	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
 	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 	for (i = 0; i < 6; i++)
 		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
 
@@ -1125,11 +1080,8 @@ int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 fdb_id,
-			      const struct dpsw_fdb_multicast_cfg *cfg)
+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_multicast_op *cmd_params;
@@ -1143,7 +1095,7 @@ int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
 	cmd_params->fdb_id = cpu_to_le16(fdb_id);
 	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
 	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 	for (i = 0; i < 6; i++)
 		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
 
@@ -1152,33 +1104,97 @@ int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
 }
 
 /**
- * dpsw_fdb_set_learning_mode() - Define FDB learning mode
+ * dpsw_ctrl_if_get_attributes() - Obtain control interface attributes
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- * @mode:	Learning mode
+ * @attr:	Returned control interface attributes
  *
- * Return:	Completion status. '0' on Success; Error code otherwise.
+ * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_fdb_set_learning_mode(struct fsl_mc_io *mc_io,
-			       u32 cmd_flags,
-			       u16 token,
-			       u16 fdb_id,
-			       enum dpsw_fdb_learning_mode mode)
+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				u16 token, struct dpsw_ctrl_if_attr *attr)
 {
+	struct dpsw_rsp_ctrl_if_get_attr *rsp_params;
 	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_fdb_set_learning_mode *cmd_params;
+	int err;
 
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_SET_LEARNING_MODE,
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_GET_ATTR,
+					  cmd_flags, token);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_ctrl_if_get_attr *)cmd.params;
+	attr->rx_fqid = le32_to_cpu(rsp_params->rx_fqid);
+	attr->rx_err_fqid = le32_to_cpu(rsp_params->rx_err_fqid);
+	attr->tx_err_conf_fqid = le32_to_cpu(rsp_params->tx_err_conf_fqid);
+
+	return 0;
+}
+
+/**
+ * dpsw_ctrl_if_set_pools() - Set control interface buffer pools
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @cfg:	Buffer pools configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   const struct dpsw_ctrl_if_pools_cfg *cfg)
+{
+	struct dpsw_cmd_ctrl_if_set_pools *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int i;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_POOLS,
+					  cmd_flags, token);
+	cmd_params = (struct dpsw_cmd_ctrl_if_set_pools *)cmd.params;
+	cmd_params->num_dpbp = cfg->num_dpbp;
+	for (i = 0; i < DPSW_MAX_DPBP; i++) {
+		cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id);
+		cmd_params->buffer_size[i] =
+			cpu_to_le16(cfg->pools[i].buffer_size);
+		cmd_params->backup_pool_mask |=
+			DPSW_BACKUP_POOL(cfg->pools[i].backup_pool, i);
+	}
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_ctrl_if_set_queue() - Set Rx queue configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of dpsw object
+ * @qtype:	dpsw_queue_type of the targeted queue
+ * @cfg:	Rx queue configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   enum dpsw_queue_type qtype,
+			   const struct dpsw_ctrl_if_queue_cfg *cfg)
+{
+	struct dpsw_cmd_ctrl_if_set_queue *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_QUEUE,
 					  cmd_flags,
 					  token);
-	cmd_params = (struct dpsw_cmd_fdb_set_learning_mode *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-	dpsw_set_field(cmd_params->mode, LEARNING_MODE, mode);
+	cmd_params = (struct dpsw_cmd_ctrl_if_set_queue *)cmd.params;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->dest_priority = cfg->dest_cfg.priority;
+	cmd_params->qtype = qtype;
+	cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
+	cmd_params->options = cpu_to_le32(cfg->options);
+	dpsw_set_field(cmd_params->dest_type,
+		       DEST_TYPE,
+		       cfg->dest_cfg.dest_type);
 
-	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
@@ -1191,10 +1207,8 @@ int dpsw_fdb_set_learning_mode(struct fsl_mc_io *mc_io,
  *
  * Return:  '0' on Success; Error code otherwise.
  */
-int dpsw_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver)
+int dpsw_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			 u16 *major_ver, u16 *minor_ver)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_rsp_get_api_version *rsp_params;
@@ -1216,7 +1230,7 @@ int dpsw_get_api_version(struct fsl_mc_io *mc_io,
 }
 
 /**
- * dpsw_if_get_port_mac_addr()
+ * dpsw_if_get_port_mac_addr() - Retrieve MAC address associated to the physical port
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPSW object
@@ -1254,69 +1268,89 @@ int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 }
 
 /**
- * dpsw_if_get_primary_mac_addr()
+ * dpsw_ctrl_if_enable() - Enable control interface
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
  *
- * Return:	Completion status. '0' on Success; Error code otherwise.
+ * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6])
+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
-	struct dpsw_rsp_if_get_mac_addr *rsp_params;
 	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if *cmd_params;
-	int err, i;
 
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_ENABLE, cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_ctrl_if_disable() - Function disables control interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_DISABLE,
 					  cmd_flags,
 					  token);
-	cmd_params = (struct dpsw_cmd_if *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
 
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
+	return mc_send_command(mc_io, &cmd);
+}
 
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_if_get_mac_addr *)cmd.params;
-	for (i = 0; i < 6; i++)
-		mac_addr[5 - i] = rsp_params->mac_addr[i];
+/**
+ * dpsw_set_egress_flood() - Set egress parameters associated with an FDB ID
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @cfg:	Egress flooding configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  const struct dpsw_egress_flood_cfg *cfg)
+{
+	struct dpsw_cmd_set_egress_flood *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
 
-	return 0;
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_EGRESS_FLOOD, cmd_flags, token);
+	cmd_params = (struct dpsw_cmd_set_egress_flood *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
+	cmd_params->flood_type = cfg->flood_type;
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	return mc_send_command(mc_io, &cmd);
 }
 
 /**
- * dpsw_if_set_primary_mac_addr()
+ * dpsw_if_set_learning_mode() - Configure the learning mode on an interface.
+ * If this API is used, it will take precedence over the FDB configuration.
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
+ * @if_id:	InterfaceID
+ * @mode:	Learning mode
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6])
+int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 if_id, enum dpsw_learning_mode mode)
 {
-	struct dpsw_cmd_if_set_mac_addr *cmd_params;
+	struct dpsw_cmd_if_set_learning_mode *cmd_params;
 	struct fsl_mc_command cmd = { 0 };
-	int i;
 
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LEARNING_MODE,
 					  cmd_flags,
 					  token);
-	cmd_params = (struct dpsw_cmd_if_set_mac_addr *)cmd.params;
+	cmd_params = (struct dpsw_cmd_if_set_learning_mode *)cmd.params;
 	cmd_params->if_id = cpu_to_le16(if_id);
-	for (i = 0; i < 6; i++)
-		cmd_params->mac_addr[i] = mac_addr[5 - i];
+	dpsw_set_field(cmd_params->mode, LEARNING_MODE, mode);
 
-	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
new file mode 100644
index 000000000000..96837b10cc94
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -0,0 +1,631 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#ifndef __FSL_DPSW_H
+#define __FSL_DPSW_H
+
+/* Data Path L2-Switch API
+ * Contains API for handling DPSW topology and functionality
+ */
+
+struct fsl_mc_io;
+
+/* DPSW general definitions */
+
+#define DPSW_MAX_PRIORITIES	8
+
+#define DPSW_MAX_IF		64
+
+int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token);
+
+int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+/* DPSW options */
+
+/**
+ * DPSW_OPT_FLOODING_DIS - Flooding was disabled at device create
+ */
+#define DPSW_OPT_FLOODING_DIS		0x0000000000000001ULL
+/**
+ * DPSW_OPT_MULTICAST_DIS - Multicast was disabled at device create
+ */
+#define DPSW_OPT_MULTICAST_DIS		0x0000000000000004ULL
+/**
+ * DPSW_OPT_CTRL_IF_DIS - Control interface support is disabled
+ */
+#define DPSW_OPT_CTRL_IF_DIS		0x0000000000000010ULL
+
+/**
+ * enum dpsw_component_type - component type of a bridge
+ * @DPSW_COMPONENT_TYPE_C_VLAN: A C-VLAN component of an
+ *   enterprise VLAN bridge or of a Provider Bridge used
+ *   to process C-tagged frames
+ * @DPSW_COMPONENT_TYPE_S_VLAN: An S-VLAN component of a
+ *   Provider Bridge
+ *
+ */
+enum dpsw_component_type {
+	DPSW_COMPONENT_TYPE_C_VLAN = 0,
+	DPSW_COMPONENT_TYPE_S_VLAN
+};
+
+/**
+ *  enum dpsw_flooding_cfg - flooding configuration requested
+ * @DPSW_FLOODING_PER_VLAN: Flooding replicators are allocated per VLAN and
+ * interfaces present in each of them can be configured using
+ * dpsw_vlan_add_if_flooding()/dpsw_vlan_remove_if_flooding().
+ * This is the default configuration.
+ *
+ * @DPSW_FLOODING_PER_FDB: Flooding replicators are allocated per FDB and
+ * interfaces present in each of them can be configured using
+ * dpsw_set_egress_flood().
+ */
+enum dpsw_flooding_cfg {
+	DPSW_FLOODING_PER_VLAN = 0,
+	DPSW_FLOODING_PER_FDB,
+};
+
+/**
+ * enum dpsw_broadcast_cfg - broadcast configuration requested
+ * @DPSW_BROADCAST_PER_OBJECT: There is only one broadcast replicator per DPSW
+ * object. This is the default configuration.
+ * @DPSW_BROADCAST_PER_FDB: Broadcast replicators are allocated per FDB and
+ * interfaces present in each of them can be configured using
+ * dpsw_set_egress_flood().
+ */
+enum dpsw_broadcast_cfg {
+	DPSW_BROADCAST_PER_OBJECT = 0,
+	DPSW_BROADCAST_PER_FDB,
+};
+
+int dpsw_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+int dpsw_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+/* DPSW IRQ Index and Events */
+
+#define DPSW_IRQ_INDEX_IF		0x0000
+#define DPSW_IRQ_INDEX_L2SW		0x0001
+
+/**
+ * DPSW_IRQ_EVENT_LINK_CHANGED - Indicates that the link state changed
+ */
+#define DPSW_IRQ_EVENT_LINK_CHANGED	0x0001
+
+/**
+ * struct dpsw_irq_cfg - IRQ configuration
+ * @addr:	Address that must be written to signal a message-based interrupt
+ * @val:	Value to write into irq_addr address
+ * @irq_num: A user defined number associated with this IRQ
+ */
+struct dpsw_irq_cfg {
+	u64 addr;
+	u32 val;
+	int irq_num;
+};
+
+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u8 en);
+
+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      u8 irq_index, u32 mask);
+
+int dpsw_get_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u32 *status);
+
+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  u8 irq_index, u32 status);
+
+/**
+ * struct dpsw_attr - Structure representing DPSW attributes
+ * @id: DPSW object ID
+ * @options: Enable/Disable DPSW features
+ * @max_vlans: Maximum Number of VLANs
+ * @max_meters_per_if:  Number of meters per interface
+ * @max_fdbs: Maximum Number of FDBs
+ * @max_fdb_entries: Number of FDB entries for default FDB table;
+ *			0 - indicates default 1024 entries.
+ * @fdb_aging_time: Default FDB aging time for default FDB table;
+ *			0 - indicates default 300 seconds
+ * @max_fdb_mc_groups: Number of multicast groups in each FDB table;
+ *			0 - indicates default 32
+ * @mem_size: DPSW frame storage memory size
+ * @num_ifs: Number of interfaces
+ * @num_vlans: Current number of VLANs
+ * @num_fdbs: Current number of FDBs
+ * @component_type: Component type of this bridge
+ * @flooding_cfg: Flooding configuration (PER_VLAN - default, PER_FDB)
+ * @broadcast_cfg: Broadcast configuration (PER_OBJECT - default, PER_FDB)
+ */
+struct dpsw_attr {
+	int id;
+	u64 options;
+	u16 max_vlans;
+	u8 max_meters_per_if;
+	u8 max_fdbs;
+	u16 max_fdb_entries;
+	u16 fdb_aging_time;
+	u16 max_fdb_mc_groups;
+	u16 num_ifs;
+	u16 mem_size;
+	u16 num_vlans;
+	u8 num_fdbs;
+	enum dpsw_component_type component_type;
+	enum dpsw_flooding_cfg flooding_cfg;
+	enum dpsw_broadcast_cfg broadcast_cfg;
+};
+
+int dpsw_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			struct dpsw_attr *attr);
+
+/**
+ * struct dpsw_ctrl_if_attr - Control interface attributes
+ * @rx_fqid:		Receive FQID
+ * @rx_err_fqid:	Receive error FQID
+ * @tx_err_conf_fqid:	Transmit error and confirmation FQID
+ */
+struct dpsw_ctrl_if_attr {
+	u32 rx_fqid;
+	u32 rx_err_fqid;
+	u32 tx_err_conf_fqid;
+};
+
+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				u16 token, struct dpsw_ctrl_if_attr *attr);
+
+enum dpsw_queue_type {
+	DPSW_QUEUE_RX,
+	DPSW_QUEUE_TX_ERR_CONF,
+	DPSW_QUEUE_RX_ERR,
+};
+
+#define DPSW_MAX_DPBP     8
+
+/**
+ * struct dpsw_ctrl_if_pools_cfg - Control interface buffer pools configuration
+ * @num_dpbp: Number of DPBPs
+ * @pools: Array of buffer pools parameters; The number of valid entries
+ *	must match 'num_dpbp' value
+ * @pools.dpbp_id: DPBP object ID
+ * @pools.buffer_size: Buffer size
+ * @pools.backup_pool: Backup pool
+ */
+struct dpsw_ctrl_if_pools_cfg {
+	u8 num_dpbp;
+	struct {
+		int dpbp_id;
+		u16 buffer_size;
+		int backup_pool;
+	} pools[DPSW_MAX_DPBP];
+};
+
+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   const struct dpsw_ctrl_if_pools_cfg *cfg);
+
+#define DPSW_CTRL_IF_QUEUE_OPT_USER_CTX		0x00000001
+#define DPSW_CTRL_IF_QUEUE_OPT_DEST		0x00000002
+
+enum dpsw_ctrl_if_dest {
+	DPSW_CTRL_IF_DEST_NONE = 0,
+	DPSW_CTRL_IF_DEST_DPIO = 1,
+};
+
+struct dpsw_ctrl_if_dest_cfg {
+	enum dpsw_ctrl_if_dest dest_type;
+	int dest_id;
+	u8 priority;
+};
+
+struct dpsw_ctrl_if_queue_cfg {
+	u32 options;
+	u64 user_ctx;
+	struct dpsw_ctrl_if_dest_cfg dest_cfg;
+};
+
+int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   enum dpsw_queue_type qtype,
+			   const struct dpsw_ctrl_if_queue_cfg *cfg);
+
+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+/**
+ * enum dpsw_action - Action selection for special/control frames
+ * @DPSW_ACTION_DROP: Drop frame
+ * @DPSW_ACTION_REDIRECT: Redirect frame to control port
+ */
+enum dpsw_action {
+	DPSW_ACTION_DROP = 0,
+	DPSW_ACTION_REDIRECT = 1
+};
+
+#define DPSW_LINK_OPT_AUTONEG		0x0000000000000001ULL
+#define DPSW_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
+#define DPSW_LINK_OPT_PAUSE		0x0000000000000004ULL
+#define DPSW_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
+
+/**
+ * struct dpsw_link_cfg - Structure representing DPSW link configuration
+ * @rate: Rate
+ * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
+ */
+struct dpsw_link_cfg {
+	u32 rate;
+	u64 options;
+};
+
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
+			 struct dpsw_link_cfg *cfg);
+
+/**
+ * struct dpsw_link_state - Structure representing DPSW link state
+ * @rate: Rate
+ * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
+ * @up: 0 - covers two cases: down and disconnected, 1 - up
+ */
+struct dpsw_link_state {
+	u32 rate;
+	u64 options;
+	u8 up;
+};
+
+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_link_state *state);
+
+/**
+ * struct dpsw_tci_cfg - Tag Control Information (TCI) configuration
+ * @pcp: Priority Code Point (PCP): a 3-bit field which refers
+ *		 to the IEEE 802.1p priority
+ * @dei: Drop Eligible Indicator (DEI): a 1-bit field. May be used
+ *		 separately or in conjunction with PCP to indicate frames
+ *		 eligible to be dropped in the presence of congestion
+ * @vlan_id: VLAN Identifier (VID): a 12-bit field specifying the VLAN
+ *			to which the frame belongs. The hexadecimal values
+ *			of 0x000 and 0xFFF are reserved;
+ *			all other values may be used as VLAN identifiers,
+ *			allowing up to 4,094 VLANs
+ */
+struct dpsw_tci_cfg {
+	u8 pcp;
+	u8 dei;
+	u16 vlan_id;
+};
+
+int dpsw_if_set_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
+		    const struct dpsw_tci_cfg *cfg);
+
+int dpsw_if_get_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
+		    struct dpsw_tci_cfg *cfg);
+
+/**
+ * enum dpsw_stp_state - Spanning Tree Protocol (STP) states
+ * @DPSW_STP_STATE_DISABLED: Disabled state
+ * @DPSW_STP_STATE_LISTENING: Listening state
+ * @DPSW_STP_STATE_LEARNING: Learning state
+ * @DPSW_STP_STATE_FORWARDING: Forwarding state
+ * @DPSW_STP_STATE_BLOCKING: Blocking state
+ *
+ */
+enum dpsw_stp_state {
+	DPSW_STP_STATE_DISABLED = 0,
+	DPSW_STP_STATE_LISTENING = 1,
+	DPSW_STP_STATE_LEARNING = 2,
+	DPSW_STP_STATE_FORWARDING = 3,
+	DPSW_STP_STATE_BLOCKING = 0
+};
+
+/**
+ * struct dpsw_stp_cfg - Spanning Tree Protocol (STP) Configuration
+ * @vlan_id: VLAN ID STP state
+ * @state: STP state
+ */
+struct dpsw_stp_cfg {
+	u16 vlan_id;
+	enum dpsw_stp_state state;
+};
+
+int dpsw_if_set_stp(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
+		    const struct dpsw_stp_cfg *cfg);
+
+/**
+ * enum dpsw_accepted_frames - Types of frames to accept
+ * @DPSW_ADMIT_ALL: The device accepts VLAN tagged, untagged and
+ *			priority tagged frames
+ * @DPSW_ADMIT_ONLY_VLAN_TAGGED: The device discards untagged frames or
+ *			Priority-Tagged frames received on this interface.
+ *
+ */
+enum dpsw_accepted_frames {
+	DPSW_ADMIT_ALL = 1,
+	DPSW_ADMIT_ONLY_VLAN_TAGGED = 3
+};
+
+/**
+ * enum dpsw_counter  - Counters types
+ * @DPSW_CNT_ING_FRAME: Counts ingress frames
+ * @DPSW_CNT_ING_BYTE: Counts ingress bytes
+ * @DPSW_CNT_ING_FLTR_FRAME: Counts filtered ingress frames
+ * @DPSW_CNT_ING_FRAME_DISCARD: Counts discarded ingress frame
+ * @DPSW_CNT_ING_MCAST_FRAME: Counts ingress multicast frames
+ * @DPSW_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes
+ * @DPSW_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames
+ * @DPSW_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes
+ * @DPSW_CNT_EGR_FRAME: Counts egress frames
+ * @DPSW_CNT_EGR_BYTE: Counts egress bytes
+ * @DPSW_CNT_EGR_FRAME_DISCARD: Counts discarded egress frames
+ * @DPSW_CNT_EGR_STP_FRAME_DISCARD: Counts egress STP discarded frames
+ * @DPSW_CNT_ING_NO_BUFF_DISCARD: Counts ingress no buffer discarded frames
+ */
+enum dpsw_counter {
+	DPSW_CNT_ING_FRAME = 0x0,
+	DPSW_CNT_ING_BYTE = 0x1,
+	DPSW_CNT_ING_FLTR_FRAME = 0x2,
+	DPSW_CNT_ING_FRAME_DISCARD = 0x3,
+	DPSW_CNT_ING_MCAST_FRAME = 0x4,
+	DPSW_CNT_ING_MCAST_BYTE = 0x5,
+	DPSW_CNT_ING_BCAST_FRAME = 0x6,
+	DPSW_CNT_ING_BCAST_BYTES = 0x7,
+	DPSW_CNT_EGR_FRAME = 0x8,
+	DPSW_CNT_EGR_BYTE = 0x9,
+	DPSW_CNT_EGR_FRAME_DISCARD = 0xa,
+	DPSW_CNT_EGR_STP_FRAME_DISCARD = 0xb,
+	DPSW_CNT_ING_NO_BUFF_DISCARD = 0xc,
+};
+
+int dpsw_if_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 if_id, enum dpsw_counter type, u64 *counter);
+
+int dpsw_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id);
+
+int dpsw_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id);
+
+/**
+ * struct dpsw_if_attr - Structure representing DPSW interface attributes
+ * @num_tcs: Number of traffic classes
+ * @rate: Transmit rate in bits per second
+ * @options: Interface configuration options (bitmap)
+ * @enabled: Indicates if interface is enabled
+ * @accept_all_vlan: The device discards/accepts incoming frames
+ *		for VLANs that do not include this interface
+ * @admit_untagged: When set to 'DPSW_ADMIT_ONLY_VLAN_TAGGED', the device
+ *		discards untagged frames or priority-tagged frames received on
+ *		this interface;
+ *		When set to 'DPSW_ADMIT_ALL', untagged frames or priority-
+ *		tagged frames received on this interface are accepted
+ * @qdid: control frames transmit qdid
+ */
+struct dpsw_if_attr {
+	u8 num_tcs;
+	u32 rate;
+	u32 options;
+	int enabled;
+	int accept_all_vlan;
+	enum dpsw_accepted_frames admit_untagged;
+	u16 qdid;
+};
+
+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_if_attr *attr);
+
+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 if_id, u16 frame_length);
+
+/**
+ * struct dpsw_vlan_cfg - VLAN Configuration
+ * @fdb_id: Forwarding Data Base
+ */
+struct dpsw_vlan_cfg {
+	u16 fdb_id;
+};
+
+int dpsw_vlan_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		  u16 vlan_id, const struct dpsw_vlan_cfg *cfg);
+
+#define DPSW_VLAN_ADD_IF_OPT_FDB_ID            0x0001
+
+/**
+ * struct dpsw_vlan_if_cfg - Set of VLAN Interfaces
+ * @num_ifs: The number of interfaces that are assigned to the egress
+ *		list for this VLAN
+ * @if_id: The set of interfaces that are
+ *		assigned to the egress list for this VLAN
+ * @options: Options map for this command (DPSW_VLAN_ADD_IF_OPT_FDB_ID)
+ * @fdb_id: FDB id to be used by this VLAN on these specific interfaces
+ *		(taken into account only if the DPSW_VLAN_ADD_IF_OPT_FDB_ID is
+ *		specified in the options field)
+ */
+struct dpsw_vlan_if_cfg {
+	u16 num_ifs;
+	u16 options;
+	u16 if_id[DPSW_MAX_IF];
+	u16 fdb_id;
+};
+
+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		     u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		     u16 vlan_id);
+
+/**
+ * enum dpsw_fdb_entry_type - FDB Entry type - Static/Dynamic
+ * @DPSW_FDB_ENTRY_STATIC: Static entry
+ * @DPSW_FDB_ENTRY_DINAMIC: Dynamic entry
+ */
+enum dpsw_fdb_entry_type {
+	DPSW_FDB_ENTRY_STATIC = 0,
+	DPSW_FDB_ENTRY_DINAMIC = 1
+};
+
+/**
+ * struct dpsw_fdb_unicast_cfg - Unicast entry configuration
+ * @type: Select static or dynamic entry
+ * @mac_addr: MAC address
+ * @if_egress: Egress interface ID
+ */
+struct dpsw_fdb_unicast_cfg {
+	enum dpsw_fdb_entry_type type;
+	u8 mac_addr[6];
+	u16 if_egress;
+};
+
+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			 u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg);
+
+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			    u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg);
+
+#define DPSW_FDB_ENTRY_TYPE_DYNAMIC  BIT(0)
+#define DPSW_FDB_ENTRY_TYPE_UNICAST  BIT(1)
+
+/**
+ * struct fdb_dump_entry - fdb snapshot entry
+ * @mac_addr: MAC address
+ * @type: bit0 - DINAMIC(1)/STATIC(0), bit1 - UNICAST(1)/MULTICAST(0)
+ * @if_info: unicast - egress interface, multicast - number of egress interfaces
+ * @if_mask: multicast - egress interface mask
+ */
+struct fdb_dump_entry {
+	u8 mac_addr[6];
+	u8 type;
+	u8 if_info;
+	u8 if_mask[8];
+};
+
+int dpsw_fdb_dump(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id,
+		  u64 iova_addr, u32 iova_size, u16 *num_entries);
+
+/**
+ * struct dpsw_fdb_multicast_cfg - Multi-cast entry configuration
+ * @type: Select static or dynamic entry
+ * @mac_addr: MAC address
+ * @num_ifs: Number of external and internal interfaces
+ * @if_id: Egress interface IDs
+ */
+struct dpsw_fdb_multicast_cfg {
+	enum dpsw_fdb_entry_type type;
+	u8 mac_addr[6];
+	u16 num_ifs;
+	u16 if_id[DPSW_MAX_IF];
+};
+
+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg);
+
+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg);
+
+/**
+ * enum dpsw_learning_mode - Auto-learning modes
+ * @DPSW_LEARNING_MODE_DIS: Disable Auto-learning
+ * @DPSW_LEARNING_MODE_HW: Enable HW auto-Learning
+ * @DPSW_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU
+ * @DPSW_LEARNING_MODE_SECURE: Enable secure learning by CPU
+ *
+ *	NONE - SECURE LEARNING
+ *	SMAC found	DMAC found	CTLU Action
+ *	v		v	Forward frame to
+ *						1.  DMAC destination
+ *	-		v	Forward frame to
+ *						1.  DMAC destination
+ *						2.  Control interface
+ *	v		-	Forward frame to
+ *						1.  Flooding list of interfaces
+ *	-		-	Forward frame to
+ *						1.  Flooding list of interfaces
+ *						2.  Control interface
+ *	SECURE LEARING
+ *	SMAC found	DMAC found	CTLU Action
+ *	v		v		Forward frame to
+ *						1.  DMAC destination
+ *	-		v		Forward frame to
+ *						1.  Control interface
+ *	v		-		Forward frame to
+ *						1.  Flooding list of interfaces
+ *	-		-		Forward frame to
+ *						1.  Control interface
+ */
+enum dpsw_learning_mode {
+	DPSW_LEARNING_MODE_DIS = 0,
+	DPSW_LEARNING_MODE_HW = 1,
+	DPSW_LEARNING_MODE_NON_SECURE = 2,
+	DPSW_LEARNING_MODE_SECURE = 3
+};
+
+/**
+ * struct dpsw_fdb_attr - FDB Attributes
+ * @max_fdb_entries: Number of FDB entries
+ * @fdb_ageing_time: Ageing time in seconds
+ * @learning_mode: Learning mode
+ * @num_fdb_mc_groups: Current number of multicast groups
+ * @max_fdb_mc_groups: Maximum number of multicast groups
+ */
+struct dpsw_fdb_attr {
+	u16 max_fdb_entries;
+	u16 fdb_ageing_time;
+	enum dpsw_learning_mode learning_mode;
+	u16 num_fdb_mc_groups;
+	u16 max_fdb_mc_groups;
+};
+
+int dpsw_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			 u16 *major_ver, u16 *minor_ver);
+
+int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 if_id, u8 mac_addr[6]);
+
+/**
+ * struct dpsw_fdb_cfg  - FDB Configuration
+ * @num_fdb_entries: Number of FDB entries
+ * @fdb_ageing_time: Ageing time in seconds
+ */
+struct dpsw_fdb_cfg {
+	u16 num_fdb_entries;
+	u16 fdb_ageing_time;
+};
+
+int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
+		 const struct dpsw_fdb_cfg *cfg);
+
+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id);
+
+/**
+ * enum dpsw_flood_type - Define the flood type of a DPSW object
+ * @DPSW_BROADCAST: Broadcast flooding
+ * @DPSW_FLOODING: Unknown flooding
+ */
+enum dpsw_flood_type {
+	DPSW_BROADCAST = 0,
+	DPSW_FLOODING,
+};
+
+struct dpsw_egress_flood_cfg {
+	u16 fdb_id;
+	enum dpsw_flood_type flood_type;
+	u16 num_ifs;
+	u16 if_id[DPSW_MAX_IF];
+};
+
+int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  const struct dpsw_egress_flood_cfg *cfg);
+
+int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 if_id, enum dpsw_learning_mode mode);
+
+#endif /* __FSL_DPSW_H */
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 09471329f3a3..5a54976e6a28 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -5,6 +5,7 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/vmalloc.h>
+#include <net/pkt_sched.h>
 
 /* ENETC overhead: optional extension BD + 1 BD gap */
 #define ENETC_TXBDS_NEEDED(val)	((val) + 2)
@@ -12,44 +13,6 @@
 #define ENETC_MAX_SKB_FRAGS	13
 #define ENETC_TXBDS_MAX_NEEDED	ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
 
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
-			      int active_offloads);
-
-netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-	struct enetc_ndev_priv *priv = netdev_priv(ndev);
-	struct enetc_bdr *tx_ring;
-	int count;
-
-	tx_ring = priv->tx_ring[skb->queue_mapping];
-
-	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
-		if (unlikely(skb_linearize(skb)))
-			goto drop_packet_err;
-
-	count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */
-	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) {
-		netif_stop_subqueue(ndev, tx_ring->index);
-		return NETDEV_TX_BUSY;
-	}
-
-	enetc_lock_mdio();
-	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
-	enetc_unlock_mdio();
-
-	if (unlikely(!count))
-		goto drop_packet_err;
-
-	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED)
-		netif_stop_subqueue(ndev, tx_ring->index);
-
-	return NETDEV_TX_OK;
-
-drop_packet_err:
-	dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
 static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
 				struct enetc_tx_swbd *tx_swbd)
 {
@@ -220,6 +183,41 @@ dma_err:
 	return 0;
 }
 
+netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_bdr *tx_ring;
+	int count;
+
+	tx_ring = priv->tx_ring[skb->queue_mapping];
+
+	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
+		if (unlikely(skb_linearize(skb)))
+			goto drop_packet_err;
+
+	count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */
+	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) {
+		netif_stop_subqueue(ndev, tx_ring->index);
+		return NETDEV_TX_BUSY;
+	}
+
+	enetc_lock_mdio();
+	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
+	enetc_unlock_mdio();
+
+	if (unlikely(!count))
+		goto drop_packet_err;
+
+	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED)
+		netif_stop_subqueue(ndev, tx_ring->index);
+
+	return NETDEV_TX_OK;
+
+drop_packet_err:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
 static irqreturn_t enetc_msix(int irq, void *data)
 {
 	struct enetc_int_vector	*v = data;
@@ -241,10 +239,6 @@ static irqreturn_t enetc_msix(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget);
-static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
-			       struct napi_struct *napi, int work_limit);
-
 static void enetc_rx_dim_work(struct work_struct *w)
 {
 	struct dim *dim = container_of(w, struct dim, work);
@@ -273,50 +267,6 @@ static void enetc_rx_net_dim(struct enetc_int_vector *v)
 	net_dim(&v->rx_dim, dim_sample);
 }
 
-static int enetc_poll(struct napi_struct *napi, int budget)
-{
-	struct enetc_int_vector
-		*v = container_of(napi, struct enetc_int_vector, napi);
-	bool complete = true;
-	int work_done;
-	int i;
-
-	enetc_lock_mdio();
-
-	for (i = 0; i < v->count_tx_rings; i++)
-		if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
-			complete = false;
-
-	work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget);
-	if (work_done == budget)
-		complete = false;
-	if (work_done)
-		v->rx_napi_work = true;
-
-	if (!complete) {
-		enetc_unlock_mdio();
-		return budget;
-	}
-
-	napi_complete_done(napi, work_done);
-
-	if (likely(v->rx_dim_en))
-		enetc_rx_net_dim(v);
-
-	v->rx_napi_work = false;
-
-	/* enable interrupts */
-	enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
-
-	for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
-		enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i),
-				 ENETC_TBIER_TXTIE);
-
-	enetc_unlock_mdio();
-
-	return work_done;
-}
-
 static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
 {
 	int pi = enetc_rd_reg_hot(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK;
@@ -344,12 +294,7 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
 	if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
 		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 		shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
-		/* Ensure skb_mstamp_ns, which might have been populated with
-		 * the txtime, is not mistaken for a software timestamp,
-		 * because this will prevent the dispatch of our hardware
-		 * timestamp to the socket.
-		 */
-		skb->tstamp = ktime_set(0, 0);
+		skb_txtime_consumed(skb);
 		skb_tstamp_tx(skb, &shhwtstamps);
 	}
 }
@@ -483,18 +428,16 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 		/* clear 'R" as well */
 		rxbd->r.lstatus = 0;
 
-		rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-		rx_swbd++;
-		i++;
-		if (unlikely(i == rx_ring->bd_count)) {
-			i = 0;
-			rx_swbd = rx_ring->rx_swbd;
-		}
+		enetc_rxbd_next(rx_ring, &rxbd, &i);
+		rx_swbd = &rx_ring->rx_swbd[i];
 	}
 
 	if (likely(j)) {
 		rx_ring->next_to_alloc = i; /* keep track from page reuse */
 		rx_ring->next_to_use = i;
+
+		/* update ENETC's consumer index */
+		enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
 	}
 
 	return j;
@@ -680,13 +623,9 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 		u32 bd_status;
 		u16 size;
 
-		if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
-			int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
-
-			/* update ENETC's consumer index */
-			enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
-			cleaned_cnt -= count;
-		}
+		if (cleaned_cnt >= ENETC_RXBD_BUNDLE)
+			cleaned_cnt -= enetc_refill_rx_ring(rx_ring,
+							    cleaned_cnt);
 
 		rxbd = enetc_rxbd(rx_ring, i);
 		bd_status = le32_to_cpu(rxbd->r.lstatus);
@@ -704,9 +643,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		cleaned_cnt++;
 
-		rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-		if (unlikely(++i == rx_ring->bd_count))
-			i = 0;
+		enetc_rxbd_next(rx_ring, &rxbd, &i);
 
 		if (unlikely(bd_status &
 			     ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
@@ -715,9 +652,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 				dma_rmb();
 				bd_status = le32_to_cpu(rxbd->r.lstatus);
 
-				rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-				if (unlikely(++i == rx_ring->bd_count))
-					i = 0;
+				enetc_rxbd_next(rx_ring, &rxbd, &i);
 			}
 
 			rx_ring->ndev->stats.rx_dropped++;
@@ -740,9 +675,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 			cleaned_cnt++;
 
-			rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-			if (unlikely(++i == rx_ring->bd_count))
-				i = 0;
+			enetc_rxbd_next(rx_ring, &rxbd, &i);
 		}
 
 		rx_byte_cnt += skb->len;
@@ -762,6 +695,50 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 	return rx_frm_cnt;
 }
 
+static int enetc_poll(struct napi_struct *napi, int budget)
+{
+	struct enetc_int_vector
+		*v = container_of(napi, struct enetc_int_vector, napi);
+	bool complete = true;
+	int work_done;
+	int i;
+
+	enetc_lock_mdio();
+
+	for (i = 0; i < v->count_tx_rings; i++)
+		if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
+			complete = false;
+
+	work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget);
+	if (work_done == budget)
+		complete = false;
+	if (work_done)
+		v->rx_napi_work = true;
+
+	if (!complete) {
+		enetc_unlock_mdio();
+		return budget;
+	}
+
+	napi_complete_done(napi, work_done);
+
+	if (likely(v->rx_dim_en))
+		enetc_rx_net_dim(v);
+
+	v->rx_napi_work = false;
+
+	/* enable interrupts */
+	enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
+
+	for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
+		enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i),
+				 ENETC_TBIER_TXTIE);
+
+	enetc_unlock_mdio();
+
+	return work_done;
+}
+
 /* Probing and Init */
 #define ENETC_MAX_RFS_SIZE 64
 void enetc_get_si_caps(struct enetc_si *si)
@@ -995,60 +972,6 @@ static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv)
 		enetc_free_tx_ring(priv->tx_ring[i]);
 }
 
-int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
-{
-	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
-
-	cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base,
-					   GFP_KERNEL);
-	if (!cbdr->bd_base)
-		return -ENOMEM;
-
-	/* h/w requires 128B alignment */
-	if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) {
-		dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
-		return -EINVAL;
-	}
-
-	cbdr->next_to_clean = 0;
-	cbdr->next_to_use = 0;
-
-	return 0;
-}
-
-void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
-{
-	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
-
-	dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
-	cbdr->bd_base = NULL;
-}
-
-void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
-{
-	/* set CBDR cache attributes */
-	enetc_wr(hw, ENETC_SICAR2,
-		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
-
-	enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base));
-	enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base));
-	enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count));
-
-	enetc_wr(hw, ENETC_SICBDRPIR, 0);
-	enetc_wr(hw, ENETC_SICBDRCIR, 0);
-
-	/* enable ring */
-	enetc_wr(hw, ENETC_SICBDRMR, BIT(31));
-
-	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
-	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
-}
-
-void enetc_clear_cbdr(struct enetc_hw *hw)
-{
-	enetc_wr(hw, ENETC_SICBDRMR, 0);
-}
-
 static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
 {
 	int *rss_table;
@@ -1108,45 +1031,22 @@ void enetc_init_si_rings_params(struct enetc_ndev_priv *priv)
 	priv->bdr_int_num = cpus;
 	priv->ic_mode = ENETC_IC_RX_ADAPTIVE | ENETC_IC_TX_MANUAL;
 	priv->tx_ictt = ENETC_TXIC_TIMETHR;
-
-	/* SI specific */
-	si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
 }
 
 int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 {
 	struct enetc_si *si = priv->si;
-	int err;
-
-	err = enetc_alloc_cbdr(priv->dev, &si->cbd_ring);
-	if (err)
-		return err;
-
-	enetc_setup_cbdr(&si->hw, &si->cbd_ring);
 
 	priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules),
 				  GFP_KERNEL);
-	if (!priv->cls_rules) {
-		err = -ENOMEM;
-		goto err_alloc_cls;
-	}
+	if (!priv->cls_rules)
+		return -ENOMEM;
 
 	return 0;
-
-err_alloc_cls:
-	enetc_clear_cbdr(&si->hw);
-	enetc_free_cbdr(priv->dev, &si->cbd_ring);
-
-	return err;
 }
 
 void enetc_free_si_resources(struct enetc_ndev_priv *priv)
 {
-	struct enetc_si *si = priv->si;
-
-	enetc_clear_cbdr(&si->hw);
-	enetc_free_cbdr(priv->dev, &si->cbd_ring);
-
 	kfree(priv->cls_rules);
 }
 
@@ -1217,9 +1117,9 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	rx_ring->rcir = hw->reg + ENETC_BDR(RX, idx, ENETC_RBCIR);
 	rx_ring->idr = hw->reg + ENETC_SIRXIDR;
 
+	enetc_lock_mdio();
 	enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
-	/* update ENETC's consumer index */
-	enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, rx_ring->next_to_use);
+	enetc_unlock_mdio();
 
 	/* enable ring */
 	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 8b380fc13314..773e412b9f4e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -98,12 +98,14 @@ struct enetc_cbdr {
 	void *bd_base; /* points to Rx or Tx BD ring */
 	void __iomem *pir;
 	void __iomem *cir;
+	void __iomem *mr; /* mode register */
 
 	int bd_count; /* # of BDs */
 	int next_to_use;
 	int next_to_clean;
 
 	dma_addr_t bd_dma_base;
+	struct device *dma_dev;
 };
 
 #define ENETC_TXBD(BDR, i) (&(((union enetc_tx_bd *)((BDR).bd_base))[i]))
@@ -119,19 +121,26 @@ static inline union enetc_rx_bd *enetc_rxbd(struct enetc_bdr *rx_ring, int i)
 	return &(((union enetc_rx_bd *)rx_ring->bd_base)[hw_idx]);
 }
 
-static inline union enetc_rx_bd *enetc_rxbd_next(struct enetc_bdr *rx_ring,
-						 union enetc_rx_bd *rxbd,
-						 int i)
+static inline void enetc_rxbd_next(struct enetc_bdr *rx_ring,
+				   union enetc_rx_bd **old_rxbd, int *old_index)
 {
-	rxbd++;
+	union enetc_rx_bd *new_rxbd = *old_rxbd;
+	int new_index = *old_index;
+
+	new_rxbd++;
+
 #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
 	if (rx_ring->ext_en)
-		rxbd++;
+		new_rxbd++;
 #endif
-	if (unlikely(++i == rx_ring->bd_count))
-		rxbd = rx_ring->bd_base;
 
-	return rxbd;
+	if (unlikely(++new_index == rx_ring->bd_count)) {
+		new_rxbd = rx_ring->bd_base;
+		new_index = 0;
+	}
+
+	*old_rxbd = new_rxbd;
+	*old_index = new_index;
 }
 
 static inline union enetc_rx_bd *enetc_rxbd_ext(union enetc_rx_bd *rxbd)
@@ -252,7 +261,7 @@ struct enetc_ndev_priv {
 	u16 rx_bd_count, tx_bd_count;
 
 	u16 msg_enable;
-	int active_offloads;
+	enum enetc_active_offloads active_offloads;
 
 	u32 speed; /* store speed for compare update pspeed */
 
@@ -310,10 +319,9 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 void enetc_set_ethtool_ops(struct net_device *ndev);
 
 /* control buffer descriptor ring (CBDR) */
-int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
-void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
-void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr);
-void enetc_clear_cbdr(struct enetc_hw *hw);
+int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count,
+		     struct enetc_cbdr *cbdr);
+void enetc_teardown_cbdr(struct enetc_cbdr *cbdr);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 			    char *mac_addr, int si_map);
 int enetc_clear_mac_flt_entry(struct enetc_si *si, int index);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index 201cbc362e33..073e56dcca4e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -3,9 +3,63 @@
 
 #include "enetc.h"
 
-static void enetc_clean_cbdr(struct enetc_si *si)
+int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count,
+		     struct enetc_cbdr *cbdr)
+{
+	int size = bd_count * sizeof(struct enetc_cbd);
+
+	cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base,
+					   GFP_KERNEL);
+	if (!cbdr->bd_base)
+		return -ENOMEM;
+
+	/* h/w requires 128B alignment */
+	if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) {
+		dma_free_coherent(dev, size, cbdr->bd_base,
+				  cbdr->bd_dma_base);
+		return -EINVAL;
+	}
+
+	cbdr->next_to_clean = 0;
+	cbdr->next_to_use = 0;
+	cbdr->dma_dev = dev;
+	cbdr->bd_count = bd_count;
+
+	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
+	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
+	cbdr->mr = hw->reg + ENETC_SICBDRMR;
+
+	/* set CBDR cache attributes */
+	enetc_wr(hw, ENETC_SICAR2,
+		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
+
+	enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base));
+	enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base));
+	enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count));
+
+	enetc_wr_reg(cbdr->pir, cbdr->next_to_clean);
+	enetc_wr_reg(cbdr->cir, cbdr->next_to_use);
+	/* enable ring */
+	enetc_wr_reg(cbdr->mr, BIT(31));
+
+	return 0;
+}
+
+void enetc_teardown_cbdr(struct enetc_cbdr *cbdr)
+{
+	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
+
+	/* disable ring */
+	enetc_wr_reg(cbdr->mr, 0);
+
+	dma_free_coherent(cbdr->dma_dev, size, cbdr->bd_base,
+			  cbdr->bd_dma_base);
+	cbdr->bd_base = NULL;
+	cbdr->dma_dev = NULL;
+}
+
+static void enetc_clean_cbdr(struct enetc_cbdr *ring)
 {
-	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd *dest_cbd;
 	int i, status;
 
@@ -15,7 +69,7 @@ static void enetc_clean_cbdr(struct enetc_si *si)
 		dest_cbd = ENETC_CBD(*ring, i);
 		status = dest_cbd->status_flags & ENETC_CBD_STATUS_MASK;
 		if (status)
-			dev_warn(&si->pdev->dev, "CMD err %04x for cmd %04x\n",
+			dev_warn(ring->dma_dev, "CMD err %04x for cmd %04x\n",
 				 status, dest_cbd->cmd);
 
 		memset(dest_cbd, 0, sizeof(*dest_cbd));
@@ -43,7 +97,7 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd)
 		return -EIO;
 
 	if (unlikely(!enetc_cbd_unused(ring)))
-		enetc_clean_cbdr(si);
+		enetc_clean_cbdr(ring);
 
 	i = ring->next_to_use;
 	dest_cbd = ENETC_CBD(*ring, i);
@@ -69,7 +123,7 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd)
 	/* CBD may writeback data, feedback up level */
 	*cbd = *dest_cbd;
 
-	enetc_clean_cbdr(si);
+	enetc_clean_cbdr(ring);
 
 	return 0;
 }
@@ -117,6 +171,7 @@ int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 		       int index)
 {
+	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd cbd = {.cmd = 0};
 	dma_addr_t dma, dma_align;
 	void *tmp, *tmp_align;
@@ -129,10 +184,10 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 	cbd.length = cpu_to_le16(sizeof(*rfse));
 	cbd.opt[3] = cpu_to_le32(0); /* SI */
 
-	tmp = dma_alloc_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN,
+	tmp = dma_alloc_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
 				 &dma, GFP_KERNEL);
 	if (!tmp) {
-		dev_err(&si->pdev->dev, "DMA mapping of RFS entry failed!\n");
+		dev_err(ring->dma_dev, "DMA mapping of RFS entry failed!\n");
 		return -ENOMEM;
 	}
 
@@ -145,9 +200,9 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 
 	err = enetc_send_cmd(si, &cbd);
 	if (err)
-		dev_err(&si->pdev->dev, "FS entry add failed (%d)!", err);
+		dev_err(ring->dma_dev, "FS entry add failed (%d)!", err);
 
-	dma_free_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN,
+	dma_free_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
 			  tmp, dma);
 
 	return err;
@@ -157,6 +212,7 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 			       bool read)
 {
+	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd cbd = {.cmd = 0};
 	dma_addr_t dma, dma_align;
 	u8 *tmp, *tmp_align;
@@ -166,10 +222,10 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 		/* HW only takes in a full 64 entry table */
 		return -EINVAL;
 
-	tmp = dma_alloc_coherent(&si->pdev->dev, count + RSSE_ALIGN,
+	tmp = dma_alloc_coherent(ring->dma_dev, count + RSSE_ALIGN,
 				 &dma, GFP_KERNEL);
 	if (!tmp) {
-		dev_err(&si->pdev->dev, "DMA mapping of RSS table failed!\n");
+		dev_err(ring->dma_dev, "DMA mapping of RSS table failed!\n");
 		return -ENOMEM;
 	}
 	dma_align = ALIGN(dma, RSSE_ALIGN);
@@ -189,13 +245,13 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 
 	err = enetc_send_cmd(si, &cbd);
 	if (err)
-		dev_err(&si->pdev->dev, "RSS cmd failed (%d)!", err);
+		dev_err(ring->dma_dev, "RSS cmd failed (%d)!", err);
 
 	if (read)
 		for (i = 0; i < count; i++)
 			table[i] = tmp_align[i];
 
-	dma_free_coherent(&si->pdev->dev, count + RSSE_ALIGN, tmp, dma);
+	dma_free_coherent(ring->dma_dev, count + RSSE_ALIGN, tmp, dma);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 224fc37a6757..5e95afd61c87 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -129,16 +129,20 @@ static void enetc_clear_mac_ht_flt(struct enetc_si *si, int si_idx, int type)
 }
 
 static void enetc_set_mac_ht_flt(struct enetc_si *si, int si_idx, int type,
-				 u32 *hash)
+				 unsigned long hash)
 {
 	bool err = si->errata & ENETC_ERR_UCMCSWP;
 
 	if (type == UC) {
-		enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), *hash);
-		enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), *(hash + 1));
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err),
+			      lower_32_bits(hash));
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx),
+			      upper_32_bits(hash));
 	} else { /* MC */
-		enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), *hash);
-		enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), *(hash + 1));
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err),
+			      lower_32_bits(hash));
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx),
+			      upper_32_bits(hash));
 	}
 }
 
@@ -182,7 +186,7 @@ static void enetc_sync_mac_filters(struct enetc_pf *pf)
 		if (i == UC)
 			enetc_clear_mac_flt_entry(si, pos);
 
-		enetc_set_mac_ht_flt(si, 0, i, (u32 *)f->mac_hash_table);
+		enetc_set_mac_ht_flt(si, 0, i, *f->mac_hash_table);
 	}
 }
 
@@ -248,10 +252,10 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
 }
 
 static void enetc_set_vlan_ht_filter(struct enetc_hw *hw, int si_idx,
-				     u32 *hash)
+				     unsigned long hash)
 {
-	enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), *hash);
-	enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), *(hash + 1));
+	enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), lower_32_bits(hash));
+	enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), upper_32_bits(hash));
 }
 
 static int enetc_vid_hash_idx(unsigned int vid)
@@ -279,7 +283,7 @@ static void enetc_sync_vlan_ht_filter(struct enetc_pf *pf, bool rehash)
 		}
 	}
 
-	enetc_set_vlan_ht_filter(&pf->si->hw, 0, (u32 *)pf->vlan_ht_filter);
+	enetc_set_vlan_ht_filter(&pf->si->hw, 0, *pf->vlan_ht_filter);
 }
 
 static int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid)
@@ -1081,26 +1085,6 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
 	return err;
 }
 
-static void enetc_init_unused_port(struct enetc_si *si)
-{
-	struct device *dev = &si->pdev->dev;
-	struct enetc_hw *hw = &si->hw;
-	int err;
-
-	si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
-	err = enetc_alloc_cbdr(dev, &si->cbd_ring);
-	if (err)
-		return;
-
-	enetc_setup_cbdr(hw, &si->cbd_ring);
-
-	enetc_init_port_rfs_memory(si);
-	enetc_init_port_rss_memory(si);
-
-	enetc_clear_cbdr(hw);
-	enetc_free_cbdr(dev, &si->cbd_ring);
-}
-
 static int enetc_pf_probe(struct pci_dev *pdev,
 			  const struct pci_device_id *ent)
 {
@@ -1124,8 +1108,24 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_map_pf_space;
 	}
 
+	err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
+			       &si->cbd_ring);
+	if (err)
+		goto err_setup_cbdr;
+
+	err = enetc_init_port_rfs_memory(si);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
+		goto err_init_port_rfs;
+	}
+
+	err = enetc_init_port_rss_memory(si);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
+		goto err_init_port_rss;
+	}
+
 	if (node && !of_device_is_available(node)) {
-		enetc_init_unused_port(si);
 		dev_info(&pdev->dev, "device is disabled, skipping\n");
 		err = -ENODEV;
 		goto err_device_disabled;
@@ -1158,18 +1158,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_alloc_si_res;
 	}
 
-	err = enetc_init_port_rfs_memory(si);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
-		goto err_init_port_rfs;
-	}
-
-	err = enetc_init_port_rss_memory(si);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
-		goto err_init_port_rss;
-	}
-
 	err = enetc_configure_si(priv);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to configure SI\n");
@@ -1205,15 +1193,17 @@ err_phylink_create:
 err_mdiobus_create:
 	enetc_free_msix(priv);
 err_config_si:
-err_init_port_rss:
-err_init_port_rfs:
 err_alloc_msix:
 	enetc_free_si_resources(priv);
 err_alloc_si_res:
 	si->ndev = NULL;
 	free_netdev(ndev);
 err_alloc_netdev:
+err_init_port_rss:
+err_init_port_rfs:
 err_device_disabled:
+	enetc_teardown_cbdr(&si->cbd_ring);
+err_setup_cbdr:
 err_map_pf_space:
 	enetc_pci_remove(pdev);
 
@@ -1239,6 +1229,7 @@ static void enetc_pf_remove(struct pci_dev *pdev)
 	enetc_free_msix(priv);
 
 	enetc_free_si_resources(priv);
+	enetc_teardown_cbdr(&si->cbd_ring);
 
 	free_netdev(si->ndev);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index a9aee219fb58..cb7fa4bceaf2 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -1221,6 +1221,11 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
 
 	/* Flow meter and max frame size */
 	if (entryp) {
+		if (entryp->police.rate_pkt_ps) {
+			NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+			err = -EOPNOTSUPP;
+			goto free_sfi;
+		}
 		if (entryp->police.burst) {
 			fmi = kzalloc(sizeof(*fmi), GFP_KERNEL);
 			if (!fmi) {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 9b755a84c2d6..03090ba7e226 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -165,6 +165,11 @@ static int enetc_vf_probe(struct pci_dev *pdev,
 
 	enetc_init_si_rings_params(priv);
 
+	err = enetc_setup_cbdr(priv->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
+			       &si->cbd_ring);
+	if (err)
+		goto err_setup_cbdr;
+
 	err = enetc_alloc_si_resources(priv);
 	if (err) {
 		dev_err(&pdev->dev, "SI resource alloc failed\n");
@@ -197,6 +202,8 @@ err_config_si:
 err_alloc_msix:
 	enetc_free_si_resources(priv);
 err_alloc_si_res:
+	enetc_teardown_cbdr(&si->cbd_ring);
+err_setup_cbdr:
 	si->ndev = NULL;
 	free_netdev(ndev);
 err_alloc_netdev:
@@ -216,6 +223,7 @@ static void enetc_vf_remove(struct pci_dev *pdev)
 	enetc_free_msix(priv);
 
 	enetc_free_si_resources(priv);
+	enetc_teardown_cbdr(&si->cbd_ring);
 
 	free_netdev(si->ndev);
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 6ab9458302e1..2b7db1c22321 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -414,10 +414,6 @@ enum hnae_media_type {
  *   get ring bd number limit
  * get_pauseparam()
  *   get tx and rx of pause frame use
- * set_autoneg()
- *   set auto autonegotiation of pause frame use
- * get_autoneg()
- *   get auto autonegotiation of pause frame use
  * set_pauseparam()
  *   set tx and rx of pause frame use
  * get_coalesce_usecs()
@@ -487,8 +483,6 @@ struct hnae_ae_ops {
 				     u32 *uplimit);
 	void (*get_pauseparam)(struct hnae_handle *handle,
 			       u32 *auto_neg, u32 *rx_en, u32 *tx_en);
-	int (*set_autoneg)(struct hnae_handle *handle, u8 enable);
-	int (*get_autoneg)(struct hnae_handle *handle);
 	int (*set_pauseparam)(struct hnae_handle *handle,
 			      u32 auto_neg, u32 rx_en, u32 tx_en);
 	void (*get_coalesce_usecs)(struct hnae_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index b98244f75ab9..c615fbf9094e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -487,13 +487,6 @@ static void hns_ae_get_pauseparam(struct hnae_handle *handle,
 		hns_dsaf_get_rx_mac_pause_en(dsaf_dev, mac_cb->mac_id, rx_en);
 }
 
-static int hns_ae_set_autoneg(struct hnae_handle *handle, u8 enable)
-{
-	assert(handle);
-
-	return hns_mac_set_autoneg(hns_get_mac_cb(handle), enable);
-}
-
 static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en)
 {
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
@@ -502,17 +495,6 @@ static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en)
 	hns_mac_set_promisc(mac_cb, (u8)!!en);
 }
 
-static int hns_ae_get_autoneg(struct hnae_handle *handle)
-{
-	u32     auto_neg;
-
-	assert(handle);
-
-	hns_mac_get_autoneg(hns_get_mac_cb(handle), &auto_neg);
-
-	return auto_neg;
-}
-
 static int hns_ae_set_pauseparam(struct hnae_handle *handle,
 				 u32 autoneg, u32 rx_en, u32 tx_en)
 {
@@ -648,7 +630,7 @@ static void hns_ae_update_stats(struct hnae_handle *handle,
 	struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
 	u64 tx_bytes = 0, rx_bytes = 0, tx_packets = 0, rx_packets = 0;
 	u64 rx_errors = 0, tx_errors = 0, tx_dropped = 0;
-	u64 rx_missed_errors = 0;
+	u64 rx_missed_errors;
 
 	dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
 	if (!dsaf_dev)
@@ -965,8 +947,6 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.set_loopback = hns_ae_config_loopback,
 	.get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit,
 	.get_pauseparam = hns_ae_get_pauseparam,
-	.set_autoneg = hns_ae_set_autoneg,
-	.get_autoneg = hns_ae_get_autoneg,
 	.set_pauseparam = hns_ae_set_pauseparam,
 	.get_coalesce_usecs = hns_ae_get_coalesce_usecs,
 	.get_max_coalesced_frames = hns_ae_get_max_coalesced_frames,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 7fb7a419607d..f387a859a201 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -130,14 +130,6 @@ static void hns_gmac_get_tx_auto_pause_frames(void *mac_drv, u16 *newval)
 				     GMAC_FC_TX_TIMER_M, GMAC_FC_TX_TIMER_S);
 }
 
-static void hns_gmac_set_rx_auto_pause_frames(void *mac_drv, u32 newval)
-{
-	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-	dsaf_set_dev_bit(drv, GMAC_PAUSE_EN_REG,
-			 GMAC_PAUSE_EN_RX_FDFC_B, !!newval);
-}
-
 static void hns_gmac_config_max_frame_length(void *mac_drv, u16 newval)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
@@ -179,14 +171,6 @@ static void hns_gmac_tx_loop_pkt_dis(void *mac_drv)
 	dsaf_write_dev(drv, GMAC_TX_LOOP_PKT_PRI_REG, tx_loop_pkt_pri);
 }
 
-static void hns_gmac_set_duplex_type(void *mac_drv, u8 newval)
-{
-	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-	dsaf_set_dev_bit(drv, GMAC_DUPLEX_TYPE_REG,
-			 GMAC_DUPLEX_TYPE_B, !!newval);
-}
-
 static void hns_gmac_get_duplex_type(void *mac_drv,
 				     enum hns_gmac_duplex_mdoe *duplex_mode)
 {
@@ -687,17 +671,14 @@ static void hns_gmac_get_stats(void *mac_drv, u64 *data)
 
 static void hns_gmac_get_strings(u32 stringset, u8 *data)
 {
-	char *buff = (char *)data;
+	u8 *buff = data;
 	u32 i;
 
 	if (stringset != ETH_SS_STATS)
 		return;
 
-	for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++) {
-		snprintf(buff, ETH_GSTRING_LEN, "%s",
-			 g_gmac_stats_string[i].desc);
-		buff = buff + ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++)
+		ethtool_sprintf(&buff, g_gmac_stats_string[i].desc);
 }
 
 static int hns_gmac_get_sset_count(int stringset)
@@ -741,8 +722,6 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->set_an_mode = hns_gmac_config_an_mode;
 	mac_drv->config_loopback = hns_gmac_config_loopback;
 	mac_drv->config_pad_and_crc = hns_gmac_config_pad_and_crc;
-	mac_drv->config_half_duplex = hns_gmac_set_duplex_type;
-	mac_drv->set_rx_ignore_pause_frames = hns_gmac_set_rx_auto_pause_frames;
 	mac_drv->get_info = hns_gmac_get_info;
 	mac_drv->autoneg_stat = hns_gmac_autoneg_stat;
 	mac_drv->get_pause_enable = hns_gmac_get_pausefrm_cfg;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 4a448138b4ec..f4cf569a2599 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -1202,7 +1202,7 @@ void hns_mac_get_regs(struct hns_mac_cb *mac_cb, void *data)
 
 void hns_set_led_opt(struct hns_mac_cb *mac_cb)
 {
-	int nic_data = 0;
+	int nic_data;
 	int txpkts, rxpkts;
 
 	txpkts = mac_cb->txpkt_for_led - mac_cb->hw_stats.tx_good_pkts;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 3278bf471ddf..8943ffab4418 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -364,12 +364,8 @@ struct mac_driver {
 	void (*config_max_frame_length)(void *mac_drv, u16 newval);
 	/*config PAD and CRC enable */
 	void (*config_pad_and_crc)(void *mac_drv, u8 newval);
-	/* config duplex mode*/
-	void (*config_half_duplex)(void *mac_drv, u8 newval);
 	/*config tx pause time,if pause_time is zero,disable tx pause enable*/
 	void (*set_tx_auto_pause_frames)(void *mac_drv, u16 pause_time);
-	/*config rx pause enable*/
-	void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable);
 	/* config rx mode for promiscuous*/
 	void (*set_promiscuous)(void *mac_drv, u8 enable);
 	void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 87d3db4666df..c2a60612f503 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1613,7 +1613,7 @@ int hns_dsaf_set_mac_uc_entry(
 	struct dsaf_device *dsaf_dev,
 	struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_tbl_tcam_ucast_cfg mac_data;
 	struct dsaf_drv_priv *priv =
@@ -1679,7 +1679,7 @@ int hns_dsaf_rm_mac_addr(
 	struct dsaf_device *dsaf_dev,
 	struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_tbl_tcam_ucast_cfg mac_data;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 
@@ -1751,7 +1751,7 @@ static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src)
 int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 			     struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_drv_tbl_tcam_key mask_key;
 	struct dsaf_tbl_tcam_data *pmask_key = NULL;
@@ -1861,7 +1861,7 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id,
 			   u8 in_port_num, u8 *addr)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_drv_priv *priv =
 	    (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
@@ -1910,7 +1910,7 @@ int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id,
 int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 			     struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
@@ -2264,7 +2264,7 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num)
  */
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data)
 {
-	u32 i = 0;
+	u32 i;
 	u32 j;
 	u32 *p = data;
 	u32 reg_tmp;
@@ -2768,7 +2768,7 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
 	struct dsaf_drv_mac_single_dest_entry mask_entry;
 	struct dsaf_drv_tbl_tcam_key temp_key, mask_key;
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct hns_mac_cb *mac_cb;
 	u8 addr[ETH_ALEN] = {0};
@@ -2870,7 +2870,7 @@ static void set_promisc_tcam_disable(struct dsaf_device *dsaf_dev, u32 port)
 	struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, 0};
 	struct dsaf_tbl_tcam_data tbl_tcam_mask = {0, 0};
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	u8 addr[ETH_ALEN] = {0};
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 173d6966c1a3..325e81d30cfd 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -686,7 +686,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en)
 	obj_args[0].integer.type = ACPI_TYPE_INTEGER;
 	obj_args[0].integer.value = mac_cb->mac_id;
 	obj_args[1].integer.type = ACPI_TYPE_INTEGER;
-	obj_args[1].integer.value = !!en;
+	obj_args[1].integer.value = en;
 
 	argv4.type = ACPI_TYPE_PACKAGE;
 	argv4.package.count = 2;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index d0f8b1fff333..ff03cafccb66 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -462,33 +462,22 @@ int hns_ppe_get_regs_count(void)
  */
 void hns_ppe_get_strings(struct hns_ppe_cb *ppe_cb, int stringset, u8 *data)
 {
-	char *buff = (char *)data;
 	int index = ppe_cb->index;
-
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_sw_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_ok", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_drop_pkt_no_bd", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_alloc_buf_fail", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_alloc_buf_wait", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_drop_no_buf", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_err_fifo_full", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_bd", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_ok", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_err_fifo_empty", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_err_csum_fail", index);
+	u8 *buff = data;
+
+	ethtool_sprintf(&buff, "ppe%d_rx_sw_pkt", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_pkt_ok", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_drop_pkt_no_bd", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_alloc_buf_fail", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_alloc_buf_wait", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_pkt_drop_no_buf", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_pkt_err_fifo_full", index);
+
+	ethtool_sprintf(&buff, "ppe%d_tx_bd", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt_ok", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt_err_fifo_empty", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt_err_csum_fail", index);
 }
 
 void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index b6c8910cf7ba..5d5dc6942232 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -929,69 +929,42 @@ int hns_rcb_get_ring_regs_count(void)
  */
 void hns_rcb_get_strings(int stringset, u8 *data, int index)
 {
-	char *buff = (char *)data;
+	u8 *buff = data;
 
 	if (stringset != ETH_SS_STATS)
 		return;
 
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_rcb_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_ppe_tx_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_ppe_drop_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_fbd_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_bytes", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_err_cnt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_io_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_sw_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_seg_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_restart_queue", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_tx_busy", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_rcb_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_ppe_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_ppe_drop_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_fbd_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_bytes", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_err_cnt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_io_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_sw_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_seg_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_reuse_pg", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_len_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_non_vld_desc_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_bd_num_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_l2_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_l3l4csum_err", index);
+	ethtool_sprintf(&buff, "tx_ring%d_rcb_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_ppe_tx_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_ppe_drop_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_fbd_num", index);
+
+	ethtool_sprintf(&buff, "tx_ring%d_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_bytes", index);
+	ethtool_sprintf(&buff, "tx_ring%d_err_cnt", index);
+	ethtool_sprintf(&buff, "tx_ring%d_io_err", index);
+	ethtool_sprintf(&buff, "tx_ring%d_sw_err", index);
+	ethtool_sprintf(&buff, "tx_ring%d_seg_pkt", index);
+	ethtool_sprintf(&buff, "tx_ring%d_restart_queue", index);
+	ethtool_sprintf(&buff, "tx_ring%d_tx_busy", index);
+
+	ethtool_sprintf(&buff, "rx_ring%d_rcb_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_ppe_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_ppe_drop_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_fbd_num", index);
+
+	ethtool_sprintf(&buff, "rx_ring%d_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_bytes", index);
+	ethtool_sprintf(&buff, "rx_ring%d_err_cnt", index);
+	ethtool_sprintf(&buff, "rx_ring%d_io_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_sw_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_seg_pkt", index);
+	ethtool_sprintf(&buff, "rx_ring%d_reuse_pg", index);
+	ethtool_sprintf(&buff, "rx_ring%d_len_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_non_vld_desc_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_bd_num_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_l2_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_l3l4csum_err", index);
 }
 
 void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data)
@@ -1001,7 +974,7 @@ void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data)
 	bool is_dbg = HNS_DSAF_IS_DEBUG(rcb_com->dsaf_dev);
 	u32 reg_tmp;
 	u32 reg_num_tmp;
-	u32 i = 0;
+	u32 i;
 
 	/*rcb common registers */
 	regs[0] = dsaf_read_dev(rcb_com, RCB_COM_CFG_ENDIAN_REG);
@@ -1072,7 +1045,7 @@ void hns_rcb_get_ring_regs(struct hnae_queue *queue, void *data)
 	u32 *regs = data;
 	struct ring_pair_cb *ring_pair
 		= container_of(queue, struct ring_pair_cb, q);
-	u32 i = 0;
+	u32 i;
 
 	/*rcb ring registers */
 	regs[0] = dsaf_read_dev(queue, RCB_RING_RX_RING_BASEADDR_L_REG);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 7e3609ce112a..be52acd448f9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -267,19 +267,6 @@ static void hns_xgmac_set_pausefrm_mac_addr(void *mac_drv, char *mac_addr)
 }
 
 /**
- *hns_xgmac_set_rx_ignore_pause_frames - set rx pause param about xgmac
- *@mac_drv: mac driver
- *@enable:enable rx pause param
- */
-static void hns_xgmac_set_rx_ignore_pause_frames(void *mac_drv, u32 enable)
-{
-	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-	dsaf_set_dev_bit(drv, XGMAC_MAC_PAUSE_CTRL_REG,
-			 XGMAC_PAUSE_CTL_RX_B, !!enable);
-}
-
-/**
  *hns_xgmac_set_tx_auto_pause_frames - set tx pause param about xgmac
  *@mac_drv: mac driver
  *@enable:enable tx pause param
@@ -495,7 +482,7 @@ static void hns_xgmac_get_link_status(void *mac_drv, u32 *link_stat)
  */
 static void hns_xgmac_get_regs(void *mac_drv, void *data)
 {
-	u32 i = 0;
+	u32 i;
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 	u32 *regs = data;
 	u64 qtmp;
@@ -758,16 +745,14 @@ static void hns_xgmac_get_stats(void *mac_drv, u64 *data)
  */
 static void hns_xgmac_get_strings(u32 stringset, u8 *data)
 {
-	char *buff = (char *)data;
+	u8 *buff = data;
 	u32 i;
 
 	if (stringset != ETH_SS_STATS)
 		return;
 
-	for (i = 0; i < ARRAY_SIZE(g_xgmac_stats_string); i++) {
-		snprintf(buff, ETH_GSTRING_LEN, g_xgmac_stats_string[i].desc);
-		buff = buff + ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < ARRAY_SIZE(g_xgmac_stats_string); i++)
+		ethtool_sprintf(&buff, g_xgmac_stats_string[i].desc);
 }
 
 /**
@@ -814,9 +799,6 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->set_an_mode = NULL;
 	mac_drv->config_loopback = NULL;
 	mac_drv->config_pad_and_crc = hns_xgmac_config_pad_and_crc;
-	mac_drv->config_half_duplex = NULL;
-	mac_drv->set_rx_ignore_pause_frames =
-		hns_xgmac_set_rx_ignore_pause_frames;
 	mac_drv->mac_free = hns_xgmac_free;
 	mac_drv->adjust_link = NULL;
 	mac_drv->set_tx_auto_pause_frames = hns_xgmac_set_tx_auto_pause_frames;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index c66a7a51198e..7a1a0a90a1a3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -872,7 +872,7 @@ out:
 static bool hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
 	struct hnae_ring *ring = ring_data->ring;
-	int num = 0;
+	int num;
 	bool rx_stopped;
 
 	hns_update_rx_rate(ring);
@@ -1881,7 +1881,7 @@ static void hns_nic_set_rx_mode(struct net_device *ndev)
 static void hns_nic_get_stats64(struct net_device *ndev,
 				struct rtnl_link_stats64 *stats)
 {
-	int idx = 0;
+	int idx;
 	u64 tx_bytes = 0;
 	u64 rx_bytes = 0;
 	u64 tx_pkts = 0;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index a6e3f07caf99..da48c05435ea 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -17,7 +17,6 @@
 #define HNS_PHY_CSC_REG		16	/* Copper Specific Control Register */
 #define HNS_PHY_CSS_REG		17	/* Copper Specific Status Register */
 #define HNS_LED_FC_REG		16	/* LED Function Control Reg. */
-#define HNS_LED_PC_REG		17	/* LED Polarity Control Reg. */
 
 #define HNS_LED_FORCE_ON	9
 #define HNS_LED_FORCE_OFF	8
@@ -480,7 +479,7 @@ static int __lb_run_test(struct net_device *ndev,
 #define NIC_LB_TEST_NO_MEM_ERR 1
 #define NIC_LB_TEST_TX_CNT_ERR 2
 #define NIC_LB_TEST_RX_CNT_ERR 3
-#define NIC_LB_TEST_RX_PKG_ERR 4
+
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct hnae_handle *h = priv->ae_handle;
 	int i, j, lc, good_cnt, ret_val = 0;
@@ -895,7 +894,7 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_handle *h = priv->ae_handle;
-	char *buff = (char *)data;
+	u8 *buff = data;
 
 	if (!h->dev->ops->get_strings) {
 		netdev_err(netdev, "h->dev->ops->get_strings is null!\n");
@@ -903,74 +902,45 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	}
 
 	if (stringset == ETH_SS_TEST) {
-		if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII) {
-			memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_MAC],
-			       ETH_GSTRING_LEN);
-			buff += ETH_GSTRING_LEN;
-		}
-		memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES],
-		       ETH_GSTRING_LEN);
-		buff += ETH_GSTRING_LEN;
+		if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII)
+			ethtool_sprintf(&buff,
+					hns_nic_test_strs[MAC_INTERNALLOOP_MAC]);
+		ethtool_sprintf(&buff,
+				hns_nic_test_strs[MAC_INTERNALLOOP_SERDES]);
 		if ((netdev->phydev) && (!netdev->phydev->is_c45))
-			memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY],
-			       ETH_GSTRING_LEN);
+			ethtool_sprintf(&buff,
+					hns_nic_test_strs[MAC_INTERNALLOOP_PHY]);
 
 	} else {
-		snprintf(buff, ETH_GSTRING_LEN, "rx_packets");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_packets");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_bytes");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_bytes");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "multicast");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "collisions");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_over_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_crc_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_frame_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_fifo_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_missed_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_aborted_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_carrier_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_fifo_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_heartbeat_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_length_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_window_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_compressed");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_compressed");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "netdev_rx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "netdev_tx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-
-		snprintf(buff, ETH_GSTRING_LEN, "netdev_tx_timeout");
-		buff = buff + ETH_GSTRING_LEN;
-
-		h->dev->ops->get_strings(h, stringset, (u8 *)buff);
+		ethtool_sprintf(&buff, "rx_packets");
+		ethtool_sprintf(&buff, "tx_packets");
+		ethtool_sprintf(&buff, "rx_bytes");
+		ethtool_sprintf(&buff, "tx_bytes");
+		ethtool_sprintf(&buff, "rx_errors");
+		ethtool_sprintf(&buff, "tx_errors");
+		ethtool_sprintf(&buff, "rx_dropped");
+		ethtool_sprintf(&buff, "tx_dropped");
+		ethtool_sprintf(&buff, "multicast");
+		ethtool_sprintf(&buff, "collisions");
+		ethtool_sprintf(&buff, "rx_over_errors");
+		ethtool_sprintf(&buff, "rx_crc_errors");
+		ethtool_sprintf(&buff, "rx_frame_errors");
+		ethtool_sprintf(&buff, "rx_fifo_errors");
+		ethtool_sprintf(&buff, "rx_missed_errors");
+		ethtool_sprintf(&buff, "tx_aborted_errors");
+		ethtool_sprintf(&buff, "tx_carrier_errors");
+		ethtool_sprintf(&buff, "tx_fifo_errors");
+		ethtool_sprintf(&buff, "tx_heartbeat_errors");
+		ethtool_sprintf(&buff, "rx_length_errors");
+		ethtool_sprintf(&buff, "tx_window_errors");
+		ethtool_sprintf(&buff, "rx_compressed");
+		ethtool_sprintf(&buff, "tx_compressed");
+		ethtool_sprintf(&buff, "netdev_rx_dropped");
+		ethtool_sprintf(&buff, "netdev_tx_dropped");
+
+		ethtool_sprintf(&buff, "netdev_tx_timeout");
+
+		h->dev->ops->get_strings(h, stringset, buff);
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e9e60a935f40..01d6bfc0917c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -90,6 +90,7 @@ enum HNAE3_DEV_CAP_BITS {
 	HNAE3_DEV_SUPPORT_HW_PAD_B,
 	HNAE3_DEV_SUPPORT_STASH_B,
 	HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
+	HNAE3_DEV_SUPPORT_PAUSE_B,
 };
 
 #define hnae3_dev_fd_supported(hdev) \
@@ -134,6 +135,9 @@ enum HNAE3_DEV_CAP_BITS {
 #define hnae3_dev_stash_supported(hdev) \
 	test_bit(HNAE3_DEV_SUPPORT_STASH_B, (hdev)->ae_dev->caps)
 
+#define hnae3_dev_pause_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, (hdev)->ae_dev->caps)
+
 #define hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev) \
 	test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (ae_dev)->caps)
 
@@ -608,8 +612,6 @@ struct hnae3_ae_ops {
 			    struct ethtool_rxnfc *cmd);
 	int (*del_fd_entry)(struct hnae3_handle *handle,
 			    struct ethtool_rxnfc *cmd);
-	void (*del_all_fd_entries)(struct hnae3_handle *handle,
-				   bool clear_list);
 	int (*get_fd_rule_cnt)(struct hnae3_handle *handle,
 			       struct ethtool_rxnfc *cmd);
 	int (*get_fd_rule_info)(struct hnae3_handle *handle,
@@ -649,6 +651,10 @@ struct hnae3_ae_ops {
 	int (*del_cls_flower)(struct hnae3_handle *handle,
 			      struct flow_cls_offload *cls_flower);
 	bool (*cls_flower_active)(struct hnae3_handle *handle);
+	int (*get_phy_link_ksettings)(struct hnae3_handle *handle,
+				      struct ethtool_link_ksettings *cmd);
+	int (*set_phy_link_ksettings)(struct hnae3_handle *handle,
+				      const struct ethtool_link_ksettings *cmd);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index dd11c57027bb..9d702bd0c7c1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -362,6 +362,11 @@ static void hns3_dbg_dev_caps(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "support UDP tunnel csum: %s\n",
 		 test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, caps) ?
 		 "yes" : "no");
+	dev_info(&h->pdev->dev, "support PAUSE: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps) ?
+		 "yes" : "no");
+	dev_info(&h->pdev->dev, "support imp-controlled PHY: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, caps) ? "yes" : "no");
 }
 
 static void hns3_dbg_dev_specs(struct hnae3_handle *h)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index bf4302a5cf95..44b775efd5b9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4143,14 +4143,6 @@ static void hns3_uninit_phy(struct net_device *netdev)
 		h->ae_algo->ops->mac_disconnect_phy(h);
 }
 
-static void hns3_del_all_fd_rules(struct net_device *netdev, bool clear_list)
-{
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-
-	if (h->ae_algo->ops->del_all_fd_entries)
-		h->ae_algo->ops->del_all_fd_entries(h, clear_list);
-}
-
 static int hns3_client_start(struct hnae3_handle *handle)
 {
 	if (!handle->ae_algo->ops->client_start)
@@ -4337,8 +4329,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 
 	hns3_nic_uninit_irq(priv);
 
-	hns3_del_all_fd_rules(netdev, true);
-
 	hns3_clear_all_ring(handle, true);
 
 	hns3_nic_uninit_vector_data(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index adcec4ea7cb9..a1d69c56d119 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -642,6 +642,10 @@ static void hns3_get_pauseparam(struct net_device *netdev,
 				struct ethtool_pauseparam *param)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+
+	if (!test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps))
+		return;
 
 	if (h->ae_algo->ops->get_pauseparam)
 		h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
@@ -652,6 +656,10 @@ static int hns3_set_pauseparam(struct net_device *netdev,
 			       struct ethtool_pauseparam *param)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+
+	if (!test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps))
+		return -EOPNOTSUPP;
 
 	netif_dbg(h, drv, netdev,
 		  "set pauseparam: autoneg=%u, rx:%u, tx:%u\n",
@@ -692,6 +700,7 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 	const struct hnae3_ae_ops *ops;
 	u8 module_type;
 	u8 media_type;
@@ -722,7 +731,10 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 		break;
 	case HNAE3_MEDIA_TYPE_COPPER:
 		cmd->base.port = PORT_TP;
-		if (!netdev->phydev)
+		if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
+		    ops->get_phy_link_ksettings)
+			ops->get_phy_link_ksettings(h, cmd);
+		else if (!netdev->phydev)
 			hns3_get_ksettings(h, cmd);
 		else
 			phy_ethtool_ksettings_get(netdev->phydev, cmd);
@@ -815,6 +827,9 @@ static int hns3_set_link_ksettings(struct net_device *netdev,
 			return -EINVAL;
 
 		return phy_ethtool_ksettings_set(netdev->phydev, cmd);
+	} else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
+		   ops->set_phy_link_ksettings) {
+		return ops->set_phy_link_ksettings(handle, cmd);
 	}
 
 	if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 1bd0ddfaec4d..3284a2cb52e6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -353,7 +353,10 @@ static void hclge_set_default_capability(struct hclge_dev *hdev)
 
 	set_bit(HNAE3_DEV_SUPPORT_FD_B, ae_dev->caps);
 	set_bit(HNAE3_DEV_SUPPORT_GRO_B, ae_dev->caps);
-	set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+	if (hdev->ae_dev->dev_version == HNAE3_DEVICE_VERSION_V2) {
+		set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
+	}
 }
 
 static void hclge_parse_capability(struct hclge_dev *hdev,
@@ -378,6 +381,12 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_FD_FORWARD_TC_B))
 		set_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_FEC_B))
+		set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_PAUSE_B))
+		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
+		set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
 }
 
 static __le32 hclge_build_api_caps(void)
@@ -467,6 +476,8 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev)
 
 	hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
 	hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
+	if (hnae3_dev_phy_imp_supported(hdev))
+		hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1);
 	req->compat = cpu_to_le32(compat);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 057dda735492..565c5aa54f88 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -127,7 +127,7 @@ enum hclge_opcode_type {
 	HCLGE_OPC_QUERY_MAC_TNL_INT	= 0x0310,
 	HCLGE_OPC_MAC_TNL_INT_EN	= 0x0311,
 	HCLGE_OPC_CLEAR_MAC_TNL_INT	= 0x0312,
-	HCLGE_OPC_SERDES_LOOPBACK       = 0x0315,
+	HCLGE_OPC_COMMON_LOOPBACK       = 0x0315,
 	HCLGE_OPC_CONFIG_FEC_MODE	= 0x031A,
 
 	/* PFC/Pause commands */
@@ -243,6 +243,7 @@ enum hclge_opcode_type {
 	HCLGE_OPC_FD_KEY_CONFIG		= 0x1202,
 	HCLGE_OPC_FD_TCAM_OP		= 0x1203,
 	HCLGE_OPC_FD_AD_OP		= 0x1204,
+	HCLGE_OPC_FD_USER_DEF_OP	= 0x1207,
 
 	/* MDIO command */
 	HCLGE_OPC_MDIO_CONFIG		= 0x1900,
@@ -303,6 +304,10 @@ enum hclge_opcode_type {
 	HCLGE_PPP_CMD1_INT_CMD		= 0x2101,
 	HCLGE_MAC_ETHERTYPE_IDX_RD      = 0x2105,
 	HCLGE_NCSI_INT_EN		= 0x2401,
+
+	/* PHY command */
+	HCLGE_OPC_PHY_LINK_KSETTING	= 0x7025,
+	HCLGE_OPC_PHY_REG		= 0x7026,
 };
 
 #define HCLGE_TQP_REG_OFFSET		0x80000
@@ -384,6 +389,8 @@ enum HCLGE_CAP_BITS {
 	HCLGE_CAP_HW_PAD_B,
 	HCLGE_CAP_STASH_B,
 	HCLGE_CAP_UDP_TUNNEL_CSUM_B,
+	HCLGE_CAP_FEC_B = 13,
+	HCLGE_CAP_PAUSE_B = 14,
 };
 
 enum HCLGE_API_CAP_BITS {
@@ -958,9 +965,10 @@ struct hclge_pf_rst_done_cmd {
 
 #define HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B	BIT(0)
 #define HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B	BIT(2)
-#define HCLGE_CMD_SERDES_DONE_B			BIT(0)
-#define HCLGE_CMD_SERDES_SUCCESS_B		BIT(1)
-struct hclge_serdes_lb_cmd {
+#define HCLGE_CMD_GE_PHY_INNER_LOOP_B		BIT(3)
+#define HCLGE_CMD_COMMON_LB_DONE_B		BIT(0)
+#define HCLGE_CMD_COMMON_LB_SUCCESS_B		BIT(1)
+struct hclge_common_lb_cmd {
 	u8 mask;
 	u8 enable;
 	u8 result;
@@ -1075,6 +1083,19 @@ struct hclge_fd_ad_config_cmd {
 	u8 rsv2[8];
 };
 
+#define HCLGE_FD_USER_DEF_OFT_S		0
+#define HCLGE_FD_USER_DEF_OFT_M		GENMASK(14, 0)
+#define HCLGE_FD_USER_DEF_EN_B		15
+struct hclge_fd_user_def_cfg_cmd {
+	__le16 ol2_cfg;
+	__le16 l2_cfg;
+	__le16 ol3_cfg;
+	__le16 l3_cfg;
+	__le16 ol4_cfg;
+	__le16 l4_cfg;
+	u8 rsv[12];
+};
+
 struct hclge_get_m7_bd_cmd {
 	__le32 bd_num;
 	u8 rsv[20];
@@ -1096,6 +1117,7 @@ struct hclge_query_ppu_pf_other_int_dfx_cmd {
 
 #define HCLGE_LINK_EVENT_REPORT_EN_B	0
 #define HCLGE_NCSI_ERROR_REPORT_EN_B	1
+#define HCLGE_PHY_IMP_EN_B		2
 struct hclge_firmware_compat_cmd {
 	__le32 compat;
 	u8 rsv[20];
@@ -1137,6 +1159,36 @@ struct hclge_dev_specs_1_cmd {
 	u8 rsv1[18];
 };
 
+#define HCLGE_PHY_LINK_SETTING_BD_NUM		2
+
+struct hclge_phy_link_ksetting_0_cmd {
+	__le32 speed;
+	u8 duplex;
+	u8 autoneg;
+	u8 eth_tp_mdix;
+	u8 eth_tp_mdix_ctrl;
+	u8 port;
+	u8 transceiver;
+	u8 phy_address;
+	u8 rsv;
+	__le32 supported;
+	__le32 advertising;
+	__le32 lp_advertising;
+};
+
+struct hclge_phy_link_ksetting_1_cmd {
+	u8 master_slave_cfg;
+	u8 master_slave_state;
+	u8 rsv[22];
+};
+
+struct hclge_phy_reg_cmd {
+	__le16 reg_addr;
+	u8 rsv0[2];
+	__le16 reg_val;
+	u8 rsv1[18];
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 6b1d197df881..1c699131e8df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1546,13 +1546,13 @@ static void hclge_dbg_dump_loopback(struct hclge_dev *hdev,
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	struct hclge_config_mac_mode_cmd *req_app;
-	struct hclge_serdes_lb_cmd *req_serdes;
+	struct hclge_common_lb_cmd *req_common;
 	struct hclge_desc desc;
 	u8 loopback_en;
 	int ret;
 
 	req_app = (struct hclge_config_mac_mode_cmd *)desc.data;
-	req_serdes = (struct hclge_serdes_lb_cmd *)desc.data;
+	req_common = (struct hclge_common_lb_cmd *)desc.data;
 
 	dev_info(&hdev->pdev->dev, "mac id: %u\n", hdev->hw.mac.mac_id);
 
@@ -1569,27 +1569,33 @@ static void hclge_dbg_dump_loopback(struct hclge_dev *hdev,
 	dev_info(&hdev->pdev->dev, "app loopback: %s\n",
 		 loopback_en ? "on" : "off");
 
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, true);
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"failed to dump serdes loopback status, ret = %d\n",
+			"failed to dump common loopback status, ret = %d\n",
 			ret);
 		return;
 	}
 
-	loopback_en = req_serdes->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
+	loopback_en = req_common->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
 	dev_info(&hdev->pdev->dev, "serdes serial loopback: %s\n",
 		 loopback_en ? "on" : "off");
 
-	loopback_en = req_serdes->enable &
+	loopback_en = req_common->enable &
 			HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B;
 	dev_info(&hdev->pdev->dev, "serdes parallel loopback: %s\n",
 		 loopback_en ? "on" : "off");
 
-	if (phydev)
+	if (phydev) {
 		dev_info(&hdev->pdev->dev, "phy loopback: %s\n",
 			 phydev->loopback_enabled ? "on" : "off");
+	} else if (hnae3_dev_phy_imp_supported(hdev)) {
+		loopback_en = req_common->enable &
+			      HCLGE_CMD_GE_PHY_INNER_LOOP_B;
+		dev_info(&hdev->pdev->dev, "phy loopback: %s\n",
+			 loopback_en ? "on" : "off");
+	}
 }
 
 /* hclge_dbg_dump_mac_tnl_status: print message about mac tnl interrupt
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e3f81c7e0ce7..058317ce579c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -62,7 +62,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
 static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
 static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
-static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
+static int hclge_clear_arfs_rules(struct hclge_dev *hdev);
 static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 						   unsigned long *addr);
 static int hclge_set_default_loopback(struct hclge_dev *hdev);
@@ -70,6 +70,7 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev);
 static void hclge_sync_mac_table(struct hclge_dev *hdev);
 static void hclge_restore_hw_table(struct hclge_dev *hdev);
 static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
+static void hclge_sync_fd_table(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -384,36 +385,62 @@ static const struct key_info meta_data_key_info[] = {
 };
 
 static const struct key_info tuple_key_info[] = {
-	{ OUTER_DST_MAC, 48},
-	{ OUTER_SRC_MAC, 48},
-	{ OUTER_VLAN_TAG_FST, 16},
-	{ OUTER_VLAN_TAG_SEC, 16},
-	{ OUTER_ETH_TYPE, 16},
-	{ OUTER_L2_RSV, 16},
-	{ OUTER_IP_TOS, 8},
-	{ OUTER_IP_PROTO, 8},
-	{ OUTER_SRC_IP, 32},
-	{ OUTER_DST_IP, 32},
-	{ OUTER_L3_RSV, 16},
-	{ OUTER_SRC_PORT, 16},
-	{ OUTER_DST_PORT, 16},
-	{ OUTER_L4_RSV, 32},
-	{ OUTER_TUN_VNI, 24},
-	{ OUTER_TUN_FLOW_ID, 8},
-	{ INNER_DST_MAC, 48},
-	{ INNER_SRC_MAC, 48},
-	{ INNER_VLAN_TAG_FST, 16},
-	{ INNER_VLAN_TAG_SEC, 16},
-	{ INNER_ETH_TYPE, 16},
-	{ INNER_L2_RSV, 16},
-	{ INNER_IP_TOS, 8},
-	{ INNER_IP_PROTO, 8},
-	{ INNER_SRC_IP, 32},
-	{ INNER_DST_IP, 32},
-	{ INNER_L3_RSV, 16},
-	{ INNER_SRC_PORT, 16},
-	{ INNER_DST_PORT, 16},
-	{ INNER_L4_RSV, 32},
+	{ OUTER_DST_MAC, 48, KEY_OPT_MAC, -1, -1 },
+	{ OUTER_SRC_MAC, 48, KEY_OPT_MAC, -1, -1 },
+	{ OUTER_VLAN_TAG_FST, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_ETH_TYPE, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_IP_TOS, 8, KEY_OPT_U8, -1, -1 },
+	{ OUTER_IP_PROTO, 8, KEY_OPT_U8, -1, -1 },
+	{ OUTER_SRC_IP, 32, KEY_OPT_IP, -1, -1 },
+	{ OUTER_DST_IP, 32, KEY_OPT_IP, -1, -1 },
+	{ OUTER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_SRC_PORT, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_DST_PORT, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 },
+	{ OUTER_TUN_VNI, 24, KEY_OPT_VNI, -1, -1 },
+	{ OUTER_TUN_FLOW_ID, 8, KEY_OPT_U8, -1, -1 },
+	{ INNER_DST_MAC, 48, KEY_OPT_MAC,
+	  offsetof(struct hclge_fd_rule, tuples.dst_mac),
+	  offsetof(struct hclge_fd_rule, tuples_mask.dst_mac) },
+	{ INNER_SRC_MAC, 48, KEY_OPT_MAC,
+	  offsetof(struct hclge_fd_rule, tuples.src_mac),
+	  offsetof(struct hclge_fd_rule, tuples_mask.src_mac) },
+	{ INNER_VLAN_TAG_FST, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.vlan_tag1),
+	  offsetof(struct hclge_fd_rule, tuples_mask.vlan_tag1) },
+	{ INNER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 },
+	{ INNER_ETH_TYPE, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.ether_proto),
+	  offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) },
+	{ INNER_L2_RSV, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.l2_user_def),
+	  offsetof(struct hclge_fd_rule, tuples_mask.l2_user_def) },
+	{ INNER_IP_TOS, 8, KEY_OPT_U8,
+	  offsetof(struct hclge_fd_rule, tuples.ip_tos),
+	  offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) },
+	{ INNER_IP_PROTO, 8, KEY_OPT_U8,
+	  offsetof(struct hclge_fd_rule, tuples.ip_proto),
+	  offsetof(struct hclge_fd_rule, tuples_mask.ip_proto) },
+	{ INNER_SRC_IP, 32, KEY_OPT_IP,
+	  offsetof(struct hclge_fd_rule, tuples.src_ip),
+	  offsetof(struct hclge_fd_rule, tuples_mask.src_ip) },
+	{ INNER_DST_IP, 32, KEY_OPT_IP,
+	  offsetof(struct hclge_fd_rule, tuples.dst_ip),
+	  offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) },
+	{ INNER_L3_RSV, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.l3_user_def),
+	  offsetof(struct hclge_fd_rule, tuples_mask.l3_user_def) },
+	{ INNER_SRC_PORT, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.src_port),
+	  offsetof(struct hclge_fd_rule, tuples_mask.src_port) },
+	{ INNER_DST_PORT, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.dst_port),
+	  offsetof(struct hclge_fd_rule, tuples_mask.dst_port) },
+	{ INNER_L4_RSV, 32, KEY_OPT_LE32,
+	  offsetof(struct hclge_fd_rule, tuples.l4_user_def),
+	  offsetof(struct hclge_fd_rule, tuples_mask.l4_user_def) },
 };
 
 static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
@@ -751,8 +778,9 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 		handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
 		handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
 
-		if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv &&
-		    hdev->hw.mac.phydev->drv->set_loopback) {
+		if ((hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv &&
+		     hdev->hw.mac.phydev->drv->set_loopback) ||
+		    hnae3_dev_phy_imp_supported(hdev)) {
 			count += 1;
 			handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK;
 		}
@@ -1150,8 +1178,10 @@ static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
 	if (hnae3_dev_fec_supported(hdev))
 		hclge_convert_setting_fec(mac);
 
+	if (hnae3_dev_pause_supported(hdev))
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
+
 	linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
 }
 
@@ -1163,8 +1193,11 @@ static void hclge_parse_backplane_link_mode(struct hclge_dev *hdev,
 	hclge_convert_setting_kr(mac, speed_ability);
 	if (hnae3_dev_fec_supported(hdev))
 		hclge_convert_setting_fec(mac);
+
+	if (hnae3_dev_pause_supported(hdev))
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
+
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
 }
 
@@ -1193,10 +1226,13 @@ static void hclge_parse_copper_link_mode(struct hclge_dev *hdev,
 		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported);
 	}
 
+	if (hnae3_dev_pause_supported(hdev)) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
+	}
+
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
 }
 
 static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability)
@@ -2889,10 +2925,12 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 	clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state);
 }
 
-static void hclge_update_port_capability(struct hclge_mac *mac)
+static void hclge_update_port_capability(struct hclge_dev *hdev,
+					 struct hclge_mac *mac)
 {
-	/* update fec ability by speed */
-	hclge_convert_setting_fec(mac);
+	if (hnae3_dev_fec_supported(hdev))
+		/* update fec ability by speed */
+		hclge_convert_setting_fec(mac);
 
 	/* firmware can not identify back plane type, the media type
 	 * read from configuration can help deal it
@@ -2984,6 +3022,141 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
 	return 0;
 }
 
+static int hclge_get_phy_link_ksettings(struct hnae3_handle *handle,
+					struct ethtool_link_ksettings *cmd)
+{
+	struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM];
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_phy_link_ksetting_0_cmd *req0;
+	struct hclge_phy_link_ksetting_1_cmd *req1;
+	u32 supported, advertising, lp_advertising;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING,
+				   true);
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get phy link ksetting, ret = %d.\n", ret);
+		return ret;
+	}
+
+	req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data;
+	cmd->base.autoneg = req0->autoneg;
+	cmd->base.speed = le32_to_cpu(req0->speed);
+	cmd->base.duplex = req0->duplex;
+	cmd->base.port = req0->port;
+	cmd->base.transceiver = req0->transceiver;
+	cmd->base.phy_address = req0->phy_address;
+	cmd->base.eth_tp_mdix = req0->eth_tp_mdix;
+	cmd->base.eth_tp_mdix_ctrl = req0->eth_tp_mdix_ctrl;
+	supported = le32_to_cpu(req0->supported);
+	advertising = le32_to_cpu(req0->advertising);
+	lp_advertising = le32_to_cpu(req0->lp_advertising);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+						lp_advertising);
+
+	req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data;
+	cmd->base.master_slave_cfg = req1->master_slave_cfg;
+	cmd->base.master_slave_state = req1->master_slave_state;
+
+	return 0;
+}
+
+static int
+hclge_set_phy_link_ksettings(struct hnae3_handle *handle,
+			     const struct ethtool_link_ksettings *cmd)
+{
+	struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM];
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_phy_link_ksetting_0_cmd *req0;
+	struct hclge_phy_link_ksetting_1_cmd *req1;
+	struct hclge_dev *hdev = vport->back;
+	u32 advertising;
+	int ret;
+
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	    ((cmd->base.speed != SPEED_100 && cmd->base.speed != SPEED_10) ||
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
+		return -EINVAL;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING,
+				   false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING,
+				   false);
+
+	req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data;
+	req0->autoneg = cmd->base.autoneg;
+	req0->speed = cpu_to_le32(cmd->base.speed);
+	req0->duplex = cmd->base.duplex;
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+	req0->advertising = cpu_to_le32(advertising);
+	req0->eth_tp_mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
+
+	req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data;
+	req1->master_slave_cfg = cmd->base.master_slave_cfg;
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to set phy link ksettings, ret = %d.\n", ret);
+		return ret;
+	}
+
+	hdev->hw.mac.autoneg = cmd->base.autoneg;
+	hdev->hw.mac.speed = cmd->base.speed;
+	hdev->hw.mac.duplex = cmd->base.duplex;
+	linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising);
+
+	return 0;
+}
+
+static int hclge_update_tp_port_info(struct hclge_dev *hdev)
+{
+	struct ethtool_link_ksettings cmd;
+	int ret;
+
+	if (!hnae3_dev_phy_imp_supported(hdev))
+		return 0;
+
+	ret = hclge_get_phy_link_ksettings(&hdev->vport->nic, &cmd);
+	if (ret)
+		return ret;
+
+	hdev->hw.mac.autoneg = cmd.base.autoneg;
+	hdev->hw.mac.speed = cmd.base.speed;
+	hdev->hw.mac.duplex = cmd.base.duplex;
+
+	return 0;
+}
+
+static int hclge_tp_port_init(struct hclge_dev *hdev)
+{
+	struct ethtool_link_ksettings cmd;
+
+	if (!hnae3_dev_phy_imp_supported(hdev))
+		return 0;
+
+	cmd.base.autoneg = hdev->hw.mac.autoneg;
+	cmd.base.speed = hdev->hw.mac.speed;
+	cmd.base.duplex = hdev->hw.mac.duplex;
+	linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising);
+
+	return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd);
+}
+
 static int hclge_update_port_info(struct hclge_dev *hdev)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
@@ -2992,7 +3165,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
 
 	/* get the port info from SFP cmd if not copper port */
 	if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
-		return 0;
+		return hclge_update_tp_port_info(hdev);
 
 	/* if IMP does not support get SFP/qSFP info, return directly */
 	if (!hdev->support_sfp_query)
@@ -3012,7 +3185,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
 
 	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		if (mac->speed_type == QUERY_ACTIVE_SPEED) {
-			hclge_update_port_capability(mac);
+			hclge_update_port_capability(hdev, mac);
 			return 0;
 		}
 		return hclge_cfg_mac_speed_dup(hdev, mac->speed,
@@ -4095,6 +4268,7 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev)
 	hclge_update_link_status(hdev);
 	hclge_sync_mac_table(hdev);
 	hclge_sync_promisc_mode(hdev);
+	hclge_sync_fd_table(hdev);
 
 	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
 		delta = jiffies - hdev->last_serv_processed;
@@ -4996,6 +5170,285 @@ static void hclge_request_update_promisc_mode(struct hnae3_handle *handle)
 	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
 }
 
+static void hclge_sync_fd_state(struct hclge_dev *hdev)
+{
+	if (hlist_empty(&hdev->fd_rule_list))
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+}
+
+static void hclge_fd_inc_rule_cnt(struct hclge_dev *hdev, u16 location)
+{
+	if (!test_bit(location, hdev->fd_bmap)) {
+		set_bit(location, hdev->fd_bmap);
+		hdev->hclge_fd_rule_num++;
+	}
+}
+
+static void hclge_fd_dec_rule_cnt(struct hclge_dev *hdev, u16 location)
+{
+	if (test_bit(location, hdev->fd_bmap)) {
+		clear_bit(location, hdev->fd_bmap);
+		hdev->hclge_fd_rule_num--;
+	}
+}
+
+static void hclge_fd_free_node(struct hclge_dev *hdev,
+			       struct hclge_fd_rule *rule)
+{
+	hlist_del(&rule->rule_node);
+	kfree(rule);
+	hclge_sync_fd_state(hdev);
+}
+
+static void hclge_update_fd_rule_node(struct hclge_dev *hdev,
+				      struct hclge_fd_rule *old_rule,
+				      struct hclge_fd_rule *new_rule,
+				      enum HCLGE_FD_NODE_STATE state)
+{
+	switch (state) {
+	case HCLGE_FD_TO_ADD:
+	case HCLGE_FD_ACTIVE:
+		/* 1) if the new state is TO_ADD, just replace the old rule
+		 * with the same location, no matter its state, because the
+		 * new rule will be configured to the hardware.
+		 * 2) if the new state is ACTIVE, it means the new rule
+		 * has been configured to the hardware, so just replace
+		 * the old rule node with the same location.
+		 * 3) for it doesn't add a new node to the list, so it's
+		 * unnecessary to update the rule number and fd_bmap.
+		 */
+		new_rule->rule_node.next = old_rule->rule_node.next;
+		new_rule->rule_node.pprev = old_rule->rule_node.pprev;
+		memcpy(old_rule, new_rule, sizeof(*old_rule));
+		kfree(new_rule);
+		break;
+	case HCLGE_FD_DELETED:
+		hclge_fd_dec_rule_cnt(hdev, old_rule->location);
+		hclge_fd_free_node(hdev, old_rule);
+		break;
+	case HCLGE_FD_TO_DEL:
+		/* if new request is TO_DEL, and old rule is existent
+		 * 1) the state of old rule is TO_DEL, we need do nothing,
+		 * because we delete rule by location, other rule content
+		 * is unncessary.
+		 * 2) the state of old rule is ACTIVE, we need to change its
+		 * state to TO_DEL, so the rule will be deleted when periodic
+		 * task being scheduled.
+		 * 3) the state of old rule is TO_ADD, it means the rule hasn't
+		 * been added to hardware, so we just delete the rule node from
+		 * fd_rule_list directly.
+		 */
+		if (old_rule->state == HCLGE_FD_TO_ADD) {
+			hclge_fd_dec_rule_cnt(hdev, old_rule->location);
+			hclge_fd_free_node(hdev, old_rule);
+			return;
+		}
+		old_rule->state = HCLGE_FD_TO_DEL;
+		break;
+	}
+}
+
+static struct hclge_fd_rule *hclge_find_fd_rule(struct hlist_head *hlist,
+						u16 location,
+						struct hclge_fd_rule **parent)
+{
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(rule, node, hlist, rule_node) {
+		if (rule->location == location)
+			return rule;
+		else if (rule->location > location)
+			return NULL;
+		/* record the parent node, use to keep the nodes in fd_rule_list
+		 * in ascend order.
+		 */
+		*parent = rule;
+	}
+
+	return NULL;
+}
+
+/* insert fd rule node in ascend order according to rule->location */
+static void hclge_fd_insert_rule_node(struct hlist_head *hlist,
+				      struct hclge_fd_rule *rule,
+				      struct hclge_fd_rule *parent)
+{
+	INIT_HLIST_NODE(&rule->rule_node);
+
+	if (parent)
+		hlist_add_behind(&rule->rule_node, &parent->rule_node);
+	else
+		hlist_add_head(&rule->rule_node, hlist);
+}
+
+static int hclge_fd_set_user_def_cmd(struct hclge_dev *hdev,
+				     struct hclge_fd_user_def_cfg *cfg)
+{
+	struct hclge_fd_user_def_cfg_cmd *req;
+	struct hclge_desc desc;
+	u16 data = 0;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_USER_DEF_OP, false);
+
+	req = (struct hclge_fd_user_def_cfg_cmd *)desc.data;
+
+	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[0].ref_cnt > 0);
+	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
+			HCLGE_FD_USER_DEF_OFT_S, cfg[0].offset);
+	req->ol2_cfg = cpu_to_le16(data);
+
+	data = 0;
+	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[1].ref_cnt > 0);
+	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
+			HCLGE_FD_USER_DEF_OFT_S, cfg[1].offset);
+	req->ol3_cfg = cpu_to_le16(data);
+
+	data = 0;
+	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[2].ref_cnt > 0);
+	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
+			HCLGE_FD_USER_DEF_OFT_S, cfg[2].offset);
+	req->ol4_cfg = cpu_to_le16(data);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to set fd user def data, ret= %d\n", ret);
+	return ret;
+}
+
+static void hclge_sync_fd_user_def_cfg(struct hclge_dev *hdev, bool locked)
+{
+	int ret;
+
+	if (!test_and_clear_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state))
+		return;
+
+	if (!locked)
+		spin_lock_bh(&hdev->fd_rule_lock);
+
+	ret = hclge_fd_set_user_def_cmd(hdev, hdev->fd_cfg.user_def_cfg);
+	if (ret)
+		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
+
+	if (!locked)
+		spin_unlock_bh(&hdev->fd_rule_lock);
+}
+
+static int hclge_fd_check_user_def_refcnt(struct hclge_dev *hdev,
+					  struct hclge_fd_rule *rule)
+{
+	struct hlist_head *hlist = &hdev->fd_rule_list;
+	struct hclge_fd_rule *fd_rule, *parent = NULL;
+	struct hclge_fd_user_def_info *info, *old_info;
+	struct hclge_fd_user_def_cfg *cfg;
+
+	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
+	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
+		return 0;
+
+	/* for valid layer is start from 1, so need minus 1 to get the cfg */
+	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
+	info = &rule->ep.user_def;
+
+	if (!cfg->ref_cnt || cfg->offset == info->offset)
+		return 0;
+
+	if (cfg->ref_cnt > 1)
+		goto error;
+
+	fd_rule = hclge_find_fd_rule(hlist, rule->location, &parent);
+	if (fd_rule) {
+		old_info = &fd_rule->ep.user_def;
+		if (info->layer == old_info->layer)
+			return 0;
+	}
+
+error:
+	dev_err(&hdev->pdev->dev,
+		"No available offset for layer%d fd rule, each layer only support one user def offset.\n",
+		info->layer + 1);
+	return -ENOSPC;
+}
+
+static void hclge_fd_inc_user_def_refcnt(struct hclge_dev *hdev,
+					 struct hclge_fd_rule *rule)
+{
+	struct hclge_fd_user_def_cfg *cfg;
+
+	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
+	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
+		return;
+
+	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
+	if (!cfg->ref_cnt) {
+		cfg->offset = rule->ep.user_def.offset;
+		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
+	}
+	cfg->ref_cnt++;
+}
+
+static void hclge_fd_dec_user_def_refcnt(struct hclge_dev *hdev,
+					 struct hclge_fd_rule *rule)
+{
+	struct hclge_fd_user_def_cfg *cfg;
+
+	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
+	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
+		return;
+
+	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
+	if (!cfg->ref_cnt)
+		return;
+
+	cfg->ref_cnt--;
+	if (!cfg->ref_cnt) {
+		cfg->offset = 0;
+		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
+	}
+}
+
+static void hclge_update_fd_list(struct hclge_dev *hdev,
+				 enum HCLGE_FD_NODE_STATE state, u16 location,
+				 struct hclge_fd_rule *new_rule)
+{
+	struct hlist_head *hlist = &hdev->fd_rule_list;
+	struct hclge_fd_rule *fd_rule, *parent = NULL;
+
+	fd_rule = hclge_find_fd_rule(hlist, location, &parent);
+	if (fd_rule) {
+		hclge_fd_dec_user_def_refcnt(hdev, fd_rule);
+		if (state == HCLGE_FD_ACTIVE)
+			hclge_fd_inc_user_def_refcnt(hdev, new_rule);
+		hclge_sync_fd_user_def_cfg(hdev, true);
+
+		hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state);
+		return;
+	}
+
+	/* it's unlikely to fail here, because we have checked the rule
+	 * exist before.
+	 */
+	if (unlikely(state == HCLGE_FD_TO_DEL || state == HCLGE_FD_DELETED)) {
+		dev_warn(&hdev->pdev->dev,
+			 "failed to delete fd rule %u, it's inexistent\n",
+			 location);
+		return;
+	}
+
+	hclge_fd_inc_user_def_refcnt(hdev, new_rule);
+	hclge_sync_fd_user_def_cfg(hdev, true);
+
+	hclge_fd_insert_rule_node(hlist, new_rule, parent);
+	hclge_fd_inc_rule_cnt(hdev, new_rule->location);
+
+	if (state == HCLGE_FD_TO_ADD) {
+		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
+		hclge_task_schedule(hdev, 0);
+	}
+}
+
 static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
 {
 	struct hclge_get_fd_mode_cmd *req;
@@ -5074,6 +5527,17 @@ static int hclge_set_fd_key_config(struct hclge_dev *hdev,
 	return ret;
 }
 
+static void hclge_fd_disable_user_def(struct hclge_dev *hdev)
+{
+	struct hclge_fd_user_def_cfg *cfg = hdev->fd_cfg.user_def_cfg;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	memset(cfg, 0, sizeof(hdev->fd_cfg.user_def_cfg));
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	hclge_fd_set_user_def_cmd(hdev, cfg);
+}
+
 static int hclge_init_fd_config(struct hclge_dev *hdev)
 {
 #define LOW_2_WORDS		0x03
@@ -5114,9 +5578,12 @@ static int hclge_init_fd_config(struct hclge_dev *hdev)
 				BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 
 	/* If use max 400bit key, we can support tuples for ether type */
-	if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1)
+	if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) {
 		key_cfg->tuple_active |=
 				BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC);
+		if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
+			key_cfg->tuple_active |= HCLGE_FD_TUPLE_USER_DEF_TUPLES;
+	}
 
 	/* roce_type is used to filter roce frames
 	 * dst_vport is used to specify the rule
@@ -5225,96 +5692,57 @@ static int hclge_fd_ad_config(struct hclge_dev *hdev, u8 stage, int loc,
 static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
 				   struct hclge_fd_rule *rule)
 {
+	int offset, moffset, ip_offset;
+	enum HCLGE_FD_KEY_OPT key_opt;
 	u16 tmp_x_s, tmp_y_s;
 	u32 tmp_x_l, tmp_y_l;
+	u8 *p = (u8 *)rule;
 	int i;
 
-	if (rule->unused_tuple & tuple_bit)
+	if (rule->unused_tuple & BIT(tuple_bit))
 		return true;
 
-	switch (tuple_bit) {
-	case BIT(INNER_DST_MAC):
-		for (i = 0; i < ETH_ALEN; i++) {
-			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
-			       rule->tuples_mask.dst_mac[i]);
-			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
-			       rule->tuples_mask.dst_mac[i]);
-		}
+	key_opt = tuple_key_info[tuple_bit].key_opt;
+	offset = tuple_key_info[tuple_bit].offset;
+	moffset = tuple_key_info[tuple_bit].moffset;
 
-		return true;
-	case BIT(INNER_SRC_MAC):
-		for (i = 0; i < ETH_ALEN; i++) {
-			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-			       rule->tuples_mask.src_mac[i]);
-			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-			       rule->tuples_mask.src_mac[i]);
-		}
+	switch (key_opt) {
+	case KEY_OPT_U8:
+		calc_x(*key_x, p[offset], p[moffset]);
+		calc_y(*key_y, p[offset], p[moffset]);
 
 		return true;
-	case BIT(INNER_VLAN_TAG_FST):
-		calc_x(tmp_x_s, rule->tuples.vlan_tag1,
-		       rule->tuples_mask.vlan_tag1);
-		calc_y(tmp_y_s, rule->tuples.vlan_tag1,
-		       rule->tuples_mask.vlan_tag1);
+	case KEY_OPT_LE16:
+		calc_x(tmp_x_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset]));
+		calc_y(tmp_y_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset]));
 		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
 		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
 
 		return true;
-	case BIT(INNER_ETH_TYPE):
-		calc_x(tmp_x_s, rule->tuples.ether_proto,
-		       rule->tuples_mask.ether_proto);
-		calc_y(tmp_y_s, rule->tuples.ether_proto,
-		       rule->tuples_mask.ether_proto);
-		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
-		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
-
-		return true;
-	case BIT(INNER_IP_TOS):
-		calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
-		calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
-
-		return true;
-	case BIT(INNER_IP_PROTO):
-		calc_x(*key_x, rule->tuples.ip_proto,
-		       rule->tuples_mask.ip_proto);
-		calc_y(*key_y, rule->tuples.ip_proto,
-		       rule->tuples_mask.ip_proto);
-
-		return true;
-	case BIT(INNER_SRC_IP):
-		calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX],
-		       rule->tuples_mask.src_ip[IPV4_INDEX]);
-		calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX],
-		       rule->tuples_mask.src_ip[IPV4_INDEX]);
-		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
-		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
-
-		return true;
-	case BIT(INNER_DST_IP):
-		calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX],
-		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
-		calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX],
-		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
+	case KEY_OPT_LE32:
+		calc_x(tmp_x_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset]));
+		calc_y(tmp_y_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset]));
 		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
 		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
 
 		return true;
-	case BIT(INNER_SRC_PORT):
-		calc_x(tmp_x_s, rule->tuples.src_port,
-		       rule->tuples_mask.src_port);
-		calc_y(tmp_y_s, rule->tuples.src_port,
-		       rule->tuples_mask.src_port);
-		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
-		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+	case KEY_OPT_MAC:
+		for (i = 0; i < ETH_ALEN; i++) {
+			calc_x(key_x[ETH_ALEN - 1 - i], p[offset + i],
+			       p[moffset + i]);
+			calc_y(key_y[ETH_ALEN - 1 - i], p[offset + i],
+			       p[moffset + i]);
+		}
 
 		return true;
-	case BIT(INNER_DST_PORT):
-		calc_x(tmp_x_s, rule->tuples.dst_port,
-		       rule->tuples_mask.dst_port);
-		calc_y(tmp_y_s, rule->tuples.dst_port,
-		       rule->tuples_mask.dst_port);
-		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
-		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+	case KEY_OPT_IP:
+		ip_offset = IPV4_INDEX * sizeof(u32);
+		calc_x(tmp_x_l, *(u32 *)(&p[offset + ip_offset]),
+		       *(u32 *)(&p[moffset + ip_offset]));
+		calc_y(tmp_y_l, *(u32 *)(&p[offset + ip_offset]),
+		       *(u32 *)(&p[moffset + ip_offset]));
+		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
+		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
 
 		return true;
 	default:
@@ -5402,12 +5830,12 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
 
 	for (i = 0 ; i < MAX_TUPLE; i++) {
 		bool tuple_valid;
-		u32 check_tuple;
 
 		tuple_size = tuple_key_info[i].key_length / 8;
-		check_tuple = key_cfg->tuple_active & BIT(i);
+		if (!(key_cfg->tuple_active & BIT(i)))
+			continue;
 
-		tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x,
+		tuple_valid = hclge_fd_convert_tuple(i, cur_key_x,
 						     cur_key_y, rule);
 		if (tuple_valid) {
 			cur_key_x += tuple_size;
@@ -5538,8 +5966,7 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec,
 	if (!spec || !unused_tuple)
 		return -EINVAL;
 
-	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
-		BIT(INNER_IP_TOS);
+	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
 
 	/* check whether src/dst ip address used */
 	if (ipv6_addr_any((struct in6_addr *)spec->ip6src))
@@ -5554,8 +5981,8 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec,
 	if (!spec->pdst)
 		*unused_tuple |= BIT(INNER_DST_PORT);
 
-	if (spec->tclass)
-		return -EOPNOTSUPP;
+	if (!spec->tclass)
+		*unused_tuple |= BIT(INNER_IP_TOS);
 
 	return 0;
 }
@@ -5567,7 +5994,7 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec,
 		return -EINVAL;
 
 	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
-		BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 
 	/* check whether src/dst ip address used */
 	if (ipv6_addr_any((struct in6_addr *)spec->ip6src))
@@ -5579,8 +6006,8 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec,
 	if (!spec->l4_proto)
 		*unused_tuple |= BIT(INNER_IP_PROTO);
 
-	if (spec->tclass)
-		return -EOPNOTSUPP;
+	if (!spec->tclass)
+		*unused_tuple |= BIT(INNER_IP_TOS);
 
 	if (spec->l4_4_bytes)
 		return -EOPNOTSUPP;
@@ -5650,9 +6077,98 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
 	return 0;
 }
 
+static int hclge_fd_get_user_def_layer(u32 flow_type, u32 *unused_tuple,
+				       struct hclge_fd_user_def_info *info)
+{
+	switch (flow_type) {
+	case ETHER_FLOW:
+		info->layer = HCLGE_FD_USER_DEF_L2;
+		*unused_tuple &= ~BIT(INNER_L2_RSV);
+		break;
+	case IP_USER_FLOW:
+	case IPV6_USER_FLOW:
+		info->layer = HCLGE_FD_USER_DEF_L3;
+		*unused_tuple &= ~BIT(INNER_L3_RSV);
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		info->layer = HCLGE_FD_USER_DEF_L4;
+		*unused_tuple &= ~BIT(INNER_L4_RSV);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static bool hclge_fd_is_user_def_all_masked(struct ethtool_rx_flow_spec *fs)
+{
+	return be32_to_cpu(fs->m_ext.data[1] | fs->m_ext.data[0]) == 0;
+}
+
+static int hclge_fd_parse_user_def_field(struct hclge_dev *hdev,
+					 struct ethtool_rx_flow_spec *fs,
+					 u32 *unused_tuple,
+					 struct hclge_fd_user_def_info *info)
+{
+	u32 tuple_active = hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1].tuple_active;
+	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+	u16 data, offset, data_mask, offset_mask;
+	int ret;
+
+	info->layer = HCLGE_FD_USER_DEF_NONE;
+	*unused_tuple |= HCLGE_FD_TUPLE_USER_DEF_TUPLES;
+
+	if (!(fs->flow_type & FLOW_EXT) || hclge_fd_is_user_def_all_masked(fs))
+		return 0;
+
+	/* user-def data from ethtool is 64 bit value, the bit0~15 is used
+	 * for data, and bit32~47 is used for offset.
+	 */
+	data = be32_to_cpu(fs->h_ext.data[1]) & HCLGE_FD_USER_DEF_DATA;
+	data_mask = be32_to_cpu(fs->m_ext.data[1]) & HCLGE_FD_USER_DEF_DATA;
+	offset = be32_to_cpu(fs->h_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET;
+	offset_mask = be32_to_cpu(fs->m_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET;
+
+	if (!(tuple_active & HCLGE_FD_TUPLE_USER_DEF_TUPLES)) {
+		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (offset > HCLGE_FD_MAX_USER_DEF_OFFSET) {
+		dev_err(&hdev->pdev->dev,
+			"user-def offset[%u] should be no more than %u\n",
+			offset, HCLGE_FD_MAX_USER_DEF_OFFSET);
+		return -EINVAL;
+	}
+
+	if (offset_mask != HCLGE_FD_USER_DEF_OFFSET_UNMASK) {
+		dev_err(&hdev->pdev->dev, "user-def offset can't be masked\n");
+		return -EINVAL;
+	}
+
+	ret = hclge_fd_get_user_def_layer(flow_type, unused_tuple, info);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"unsupported flow type for user-def bytes, ret = %d\n",
+			ret);
+		return ret;
+	}
+
+	info->data = data;
+	info->data_mask = data_mask;
+	info->offset = offset;
+
+	return 0;
+}
+
 static int hclge_fd_check_spec(struct hclge_dev *hdev,
 			       struct ethtool_rx_flow_spec *fs,
-			       u32 *unused_tuple)
+			       u32 *unused_tuple,
+			       struct hclge_fd_user_def_info *info)
 {
 	u32 flow_type;
 	int ret;
@@ -5665,11 +6181,9 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 		return -EINVAL;
 	}
 
-	if ((fs->flow_type & FLOW_EXT) &&
-	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
-		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
-		return -EOPNOTSUPP;
-	}
+	ret = hclge_fd_parse_user_def_field(hdev, fs, unused_tuple, info);
+	if (ret)
+		return ret;
 
 	flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
 	switch (flow_type) {
@@ -5721,217 +6235,194 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 	return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple);
 }
 
-static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
+static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev,
+				      struct ethtool_rx_flow_spec *fs,
+				      struct hclge_fd_rule *rule, u8 ip_proto)
 {
-	struct hclge_fd_rule *rule = NULL;
-	struct hlist_node *node2;
+	rule->tuples.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
+	rule->tuples_mask.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
 
-	spin_lock_bh(&hdev->fd_rule_lock);
-	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
-		if (rule->location >= location)
-			break;
-	}
+	rule->tuples.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
+	rule->tuples_mask.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
 
-	spin_unlock_bh(&hdev->fd_rule_lock);
+	rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
+	rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
 
-	return  rule && rule->location == location;
-}
+	rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst);
+	rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
 
-/* make sure being called after lock up with fd_rule_lock */
-static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
-				     struct hclge_fd_rule *new_rule,
-				     u16 location,
-				     bool is_add)
-{
-	struct hclge_fd_rule *rule = NULL, *parent = NULL;
-	struct hlist_node *node2;
+	rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos;
+	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
 
-	if (is_add && !new_rule)
-		return -EINVAL;
+	rule->tuples.ether_proto = ETH_P_IP;
+	rule->tuples_mask.ether_proto = 0xFFFF;
 
-	hlist_for_each_entry_safe(rule, node2,
-				  &hdev->fd_rule_list, rule_node) {
-		if (rule->location >= location)
-			break;
-		parent = rule;
-	}
-
-	if (rule && rule->location == location) {
-		hlist_del(&rule->rule_node);
-		kfree(rule);
-		hdev->hclge_fd_rule_num--;
-
-		if (!is_add) {
-			if (!hdev->hclge_fd_rule_num)
-				hdev->fd_active_type = HCLGE_FD_RULE_NONE;
-			clear_bit(location, hdev->fd_bmap);
+	rule->tuples.ip_proto = ip_proto;
+	rule->tuples_mask.ip_proto = 0xFF;
+}
 
-			return 0;
-		}
-	} else if (!is_add) {
-		dev_err(&hdev->pdev->dev,
-			"delete fail, rule %u is inexistent\n",
-			location);
-		return -EINVAL;
-	}
+static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev,
+				   struct ethtool_rx_flow_spec *fs,
+				   struct hclge_fd_rule *rule)
+{
+	rule->tuples.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
+	rule->tuples_mask.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
 
-	INIT_HLIST_NODE(&new_rule->rule_node);
+	rule->tuples.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
+	rule->tuples_mask.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
 
-	if (parent)
-		hlist_add_behind(&new_rule->rule_node, &parent->rule_node);
-	else
-		hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list);
+	rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
+	rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
 
-	set_bit(location, hdev->fd_bmap);
-	hdev->hclge_fd_rule_num++;
-	hdev->fd_active_type = new_rule->rule_type;
+	rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto;
+	rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
 
-	return 0;
+	rule->tuples.ether_proto = ETH_P_IP;
+	rule->tuples_mask.ether_proto = 0xFFFF;
 }
 
-static int hclge_fd_get_tuple(struct hclge_dev *hdev,
-			      struct ethtool_rx_flow_spec *fs,
-			      struct hclge_fd_rule *rule)
+static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev,
+				      struct ethtool_rx_flow_spec *fs,
+				      struct hclge_fd_rule *rule, u8 ip_proto)
 {
-	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src,
+			  IPV6_SIZE);
 
-	switch (flow_type) {
-	case SCTP_V4_FLOW:
-	case TCP_V4_FLOW:
-	case UDP_V4_FLOW:
-		rule->tuples.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
-		rule->tuples_mask.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
+	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst,
+			  IPV6_SIZE);
 
-		rule->tuples.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
-		rule->tuples_mask.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
-
-		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
-		rule->tuples_mask.src_port =
-				be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
-
-		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst);
-		rule->tuples_mask.dst_port =
-				be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
+	rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
+	rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
 
-		rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos;
-		rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
+	rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst);
+	rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
 
-		rule->tuples.ether_proto = ETH_P_IP;
-		rule->tuples_mask.ether_proto = 0xFFFF;
+	rule->tuples.ether_proto = ETH_P_IPV6;
+	rule->tuples_mask.ether_proto = 0xFFFF;
 
-		break;
-	case IP_USER_FLOW:
-		rule->tuples.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
-		rule->tuples_mask.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
+	rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass;
+	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
 
-		rule->tuples.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
-		rule->tuples_mask.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
+	rule->tuples.ip_proto = ip_proto;
+	rule->tuples_mask.ip_proto = 0xFF;
+}
 
-		rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
-		rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
+static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev,
+				   struct ethtool_rx_flow_spec *fs,
+				   struct hclge_fd_rule *rule)
+{
+	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src,
+			  IPV6_SIZE);
 
-		rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto;
-		rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
+	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst,
+			  IPV6_SIZE);
 
-		rule->tuples.ether_proto = ETH_P_IP;
-		rule->tuples_mask.ether_proto = 0xFFFF;
+	rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
+	rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
 
-		break;
-	case SCTP_V6_FLOW:
-	case TCP_V6_FLOW:
-	case UDP_V6_FLOW:
-		be32_to_cpu_array(rule->tuples.src_ip,
-				  fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.src_ip,
-				  fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
+	rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass;
+	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
 
-		be32_to_cpu_array(rule->tuples.dst_ip,
-				  fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.dst_ip,
-				  fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
+	rule->tuples.ether_proto = ETH_P_IPV6;
+	rule->tuples_mask.ether_proto = 0xFFFF;
+}
 
-		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
-		rule->tuples_mask.src_port =
-				be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
+static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev,
+				     struct ethtool_rx_flow_spec *fs,
+				     struct hclge_fd_rule *rule)
+{
+	ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source);
+	ether_addr_copy(rule->tuples_mask.src_mac, fs->m_u.ether_spec.h_source);
 
-		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst);
-		rule->tuples_mask.dst_port =
-				be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
+	ether_addr_copy(rule->tuples.dst_mac, fs->h_u.ether_spec.h_dest);
+	ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_u.ether_spec.h_dest);
 
-		rule->tuples.ether_proto = ETH_P_IPV6;
-		rule->tuples_mask.ether_proto = 0xFFFF;
+	rule->tuples.ether_proto = be16_to_cpu(fs->h_u.ether_spec.h_proto);
+	rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto);
+}
 
+static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info,
+					struct hclge_fd_rule *rule)
+{
+	switch (info->layer) {
+	case HCLGE_FD_USER_DEF_L2:
+		rule->tuples.l2_user_def = info->data;
+		rule->tuples_mask.l2_user_def = info->data_mask;
 		break;
-	case IPV6_USER_FLOW:
-		be32_to_cpu_array(rule->tuples.src_ip,
-				  fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.src_ip,
-				  fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE);
-
-		be32_to_cpu_array(rule->tuples.dst_ip,
-				  fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.dst_ip,
-				  fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
-
-		rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
-		rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
-
-		rule->tuples.ether_proto = ETH_P_IPV6;
-		rule->tuples_mask.ether_proto = 0xFFFF;
-
+	case HCLGE_FD_USER_DEF_L3:
+		rule->tuples.l3_user_def = info->data;
+		rule->tuples_mask.l3_user_def = info->data_mask;
 		break;
-	case ETHER_FLOW:
-		ether_addr_copy(rule->tuples.src_mac,
-				fs->h_u.ether_spec.h_source);
-		ether_addr_copy(rule->tuples_mask.src_mac,
-				fs->m_u.ether_spec.h_source);
-
-		ether_addr_copy(rule->tuples.dst_mac,
-				fs->h_u.ether_spec.h_dest);
-		ether_addr_copy(rule->tuples_mask.dst_mac,
-				fs->m_u.ether_spec.h_dest);
-
-		rule->tuples.ether_proto =
-				be16_to_cpu(fs->h_u.ether_spec.h_proto);
-		rule->tuples_mask.ether_proto =
-				be16_to_cpu(fs->m_u.ether_spec.h_proto);
-
+	case HCLGE_FD_USER_DEF_L4:
+		rule->tuples.l4_user_def = (u32)info->data << 16;
+		rule->tuples_mask.l4_user_def = (u32)info->data_mask << 16;
 		break;
 	default:
-		return -EOPNOTSUPP;
+		break;
 	}
 
+	rule->ep.user_def = *info;
+}
+
+static int hclge_fd_get_tuple(struct hclge_dev *hdev,
+			      struct ethtool_rx_flow_spec *fs,
+			      struct hclge_fd_rule *rule,
+			      struct hclge_fd_user_def_info *info)
+{
+	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+
 	switch (flow_type) {
 	case SCTP_V4_FLOW:
-	case SCTP_V6_FLOW:
-		rule->tuples.ip_proto = IPPROTO_SCTP;
-		rule->tuples_mask.ip_proto = 0xFF;
+		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP);
 		break;
 	case TCP_V4_FLOW:
-	case TCP_V6_FLOW:
-		rule->tuples.ip_proto = IPPROTO_TCP;
-		rule->tuples_mask.ip_proto = 0xFF;
+		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP);
 		break;
 	case UDP_V4_FLOW:
+		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP);
+		break;
+	case IP_USER_FLOW:
+		hclge_fd_get_ip4_tuple(hdev, fs, rule);
+		break;
+	case SCTP_V6_FLOW:
+		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP);
+		break;
+	case TCP_V6_FLOW:
+		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP);
+		break;
 	case UDP_V6_FLOW:
-		rule->tuples.ip_proto = IPPROTO_UDP;
-		rule->tuples_mask.ip_proto = 0xFF;
+		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP);
 		break;
-	default:
+	case IPV6_USER_FLOW:
+		hclge_fd_get_ip6_tuple(hdev, fs, rule);
 		break;
+	case ETHER_FLOW:
+		hclge_fd_get_ether_tuple(hdev, fs, rule);
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
 	if (fs->flow_type & FLOW_EXT) {
 		rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci);
 		rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci);
+		hclge_fd_get_user_def_tuple(info, rule);
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
@@ -5942,33 +6433,52 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 	return 0;
 }
 
-/* make sure being called after lock up with fd_rule_lock */
 static int hclge_fd_config_rule(struct hclge_dev *hdev,
 				struct hclge_fd_rule *rule)
 {
 	int ret;
 
-	if (!rule) {
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		return ret;
+
+	return hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+}
+
+static int hclge_add_fd_entry_common(struct hclge_dev *hdev,
+				     struct hclge_fd_rule *rule)
+{
+	int ret;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	if (hdev->fd_active_type != rule->rule_type &&
+	    (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE ||
+	     hdev->fd_active_type == HCLGE_FD_EP_ACTIVE)) {
 		dev_err(&hdev->pdev->dev,
-			"The flow director rule is NULL\n");
+			"mode conflict(new type %d, active type %d), please delete existent rules first\n",
+			rule->rule_type, hdev->fd_active_type);
+		spin_unlock_bh(&hdev->fd_rule_lock);
 		return -EINVAL;
 	}
 
-	/* it will never fail here, so needn't to check return value */
-	hclge_fd_update_rule_list(hdev, rule, rule->location, true);
+	ret = hclge_fd_check_user_def_refcnt(hdev, rule);
+	if (ret)
+		goto out;
 
-	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	ret = hclge_clear_arfs_rules(hdev);
 	if (ret)
-		goto clear_rule;
+		goto out;
 
-	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+	ret = hclge_fd_config_rule(hdev, rule);
 	if (ret)
-		goto clear_rule;
+		goto out;
 
-	return 0;
+	hclge_update_fd_list(hdev, HCLGE_FD_ACTIVE, rule->location, rule);
+	hdev->fd_active_type = rule->rule_type;
 
-clear_rule:
-	hclge_fd_update_rule_list(hdev, rule, rule->location, false);
+out:
+	spin_unlock_bh(&hdev->fd_rule_lock);
 	return ret;
 }
 
@@ -5980,11 +6490,48 @@ static bool hclge_is_cls_flower_active(struct hnae3_handle *handle)
 	return hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE;
 }
 
+static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie,
+				      u16 *vport_id, u8 *action, u16 *queue_id)
+{
+	struct hclge_vport *vport = hdev->vport;
+
+	if (ring_cookie == RX_CLS_FLOW_DISC) {
+		*action = HCLGE_FD_ACTION_DROP_PACKET;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie);
+		u16 tqps;
+
+		if (vf > hdev->num_req_vfs) {
+			dev_err(&hdev->pdev->dev,
+				"Error: vf id (%u) > max vf num (%u)\n",
+				vf, hdev->num_req_vfs);
+			return -EINVAL;
+		}
+
+		*vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id;
+		tqps = hdev->vport[vf].nic.kinfo.num_tqps;
+
+		if (ring >= tqps) {
+			dev_err(&hdev->pdev->dev,
+				"Error: queue id (%u) > max tqp num (%u)\n",
+				ring, tqps - 1);
+			return -EINVAL;
+		}
+
+		*action = HCLGE_FD_ACTION_SELECT_QUEUE;
+		*queue_id = ring;
+	}
+
+	return 0;
+}
+
 static int hclge_add_fd_entry(struct hnae3_handle *handle,
 			      struct ethtool_rxnfc *cmd)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_user_def_info info;
 	u16 dst_vport_id = 0, q_index = 0;
 	struct ethtool_rx_flow_spec *fs;
 	struct hclge_fd_rule *rule;
@@ -6004,51 +6551,22 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 		return -EOPNOTSUPP;
 	}
 
-	if (hclge_is_cls_flower_active(handle)) {
-		dev_err(&hdev->pdev->dev,
-			"please delete all exist cls flower rules first\n");
-		return -EINVAL;
-	}
-
 	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
-	ret = hclge_fd_check_spec(hdev, fs, &unused);
+	ret = hclge_fd_check_spec(hdev, fs, &unused, &info);
 	if (ret)
 		return ret;
 
-	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
-		action = HCLGE_FD_ACTION_DROP_PACKET;
-	} else {
-		u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie);
-		u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
-		u16 tqps;
-
-		if (vf > hdev->num_req_vfs) {
-			dev_err(&hdev->pdev->dev,
-				"Error: vf id (%u) > max vf num (%u)\n",
-				vf, hdev->num_req_vfs);
-			return -EINVAL;
-		}
-
-		dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id;
-		tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps;
-
-		if (ring >= tqps) {
-			dev_err(&hdev->pdev->dev,
-				"Error: queue id (%u) > max tqp num (%u)\n",
-				ring, tqps - 1);
-			return -EINVAL;
-		}
-
-		action = HCLGE_FD_ACTION_SELECT_QUEUE;
-		q_index = ring;
-	}
+	ret = hclge_fd_parse_ring_cookie(hdev, fs->ring_cookie, &dst_vport_id,
+					 &action, &q_index);
+	if (ret)
+		return ret;
 
 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
 	if (!rule)
 		return -ENOMEM;
 
-	ret = hclge_fd_get_tuple(hdev, fs, rule);
+	ret = hclge_fd_get_tuple(hdev, fs, rule, &info);
 	if (ret) {
 		kfree(rule);
 		return ret;
@@ -6062,15 +6580,9 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	rule->action = action;
 	rule->rule_type = HCLGE_FD_EP_ACTIVE;
 
-	/* to avoid rule conflict, when user configure rule by ethtool,
-	 * we need to clear all arfs rules
-	 */
-	spin_lock_bh(&hdev->fd_rule_lock);
-	hclge_clear_arfs_rules(handle);
-
-	ret = hclge_fd_config_rule(hdev, rule);
-
-	spin_unlock_bh(&hdev->fd_rule_lock);
+	ret = hclge_add_fd_entry_common(hdev, rule);
+	if (ret)
+		kfree(rule);
 
 	return ret;
 }
@@ -6091,32 +6603,30 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
 	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
 		return -EINVAL;
 
-	if (hclge_is_cls_flower_active(handle) || !hdev->hclge_fd_rule_num ||
-	    !hclge_fd_rule_exist(hdev, fs->location)) {
+	spin_lock_bh(&hdev->fd_rule_lock);
+	if (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE ||
+	    !test_bit(fs->location, hdev->fd_bmap)) {
 		dev_err(&hdev->pdev->dev,
 			"Delete fail, rule %u is inexistent\n", fs->location);
+		spin_unlock_bh(&hdev->fd_rule_lock);
 		return -ENOENT;
 	}
 
 	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location,
 				   NULL, false);
 	if (ret)
-		return ret;
+		goto out;
 
-	spin_lock_bh(&hdev->fd_rule_lock);
-	ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false);
+	hclge_update_fd_list(hdev, HCLGE_FD_DELETED, fs->location, NULL);
 
+out:
 	spin_unlock_bh(&hdev->fd_rule_lock);
-
 	return ret;
 }
 
-/* make sure being called after lock up with fd_rule_lock */
-static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
-				     bool clear_list)
+static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev,
+					 bool clear_list)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
 	u16 location;
@@ -6124,6 +6634,8 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
 	if (!hnae3_dev_fd_supported(hdev))
 		return;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
+
 	for_each_set_bit(location, hdev->fd_bmap,
 			 hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
 		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
@@ -6140,6 +6652,14 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
 		bitmap_zero(hdev->fd_bmap,
 			    hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
 	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+}
+
+static void hclge_del_all_fd_entries(struct hclge_dev *hdev)
+{
+	hclge_clear_fd_rules_in_list(hdev, true);
+	hclge_fd_disable_user_def(hdev);
 }
 
 static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -6148,7 +6668,6 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
-	int ret;
 
 	/* Return ok here, because reset error handling will check this
 	 * return value. If error is returned here, the reset process will
@@ -6163,25 +6682,11 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 
 	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
-		ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
-		if (!ret)
-			ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
-
-		if (ret) {
-			dev_warn(&hdev->pdev->dev,
-				 "Restore rule %u failed, remove it\n",
-				 rule->location);
-			clear_bit(rule->location, hdev->fd_bmap);
-			hlist_del(&rule->rule_node);
-			kfree(rule);
-			hdev->hclge_fd_rule_num--;
-		}
+		if (rule->state == HCLGE_FD_ACTIVE)
+			rule->state = HCLGE_FD_TO_ADD;
 	}
-
-	if (hdev->hclge_fd_rule_num)
-		hdev->fd_active_type = HCLGE_FD_EP_ACTIVE;
-
 	spin_unlock_bh(&hdev->fd_rule_lock);
+	set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 
 	return 0;
 }
@@ -6269,6 +6774,10 @@ static void hclge_fd_get_tcpip6_info(struct hclge_fd_rule *rule,
 		cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip,
 				  IPV6_SIZE);
 
+	spec->tclass = rule->tuples.ip_tos;
+	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
+			0 : rule->tuples_mask.ip_tos;
+
 	spec->psrc = cpu_to_be16(rule->tuples.src_port);
 	spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ?
 			0 : cpu_to_be16(rule->tuples_mask.src_port);
@@ -6296,6 +6805,10 @@ static void hclge_fd_get_ip6_info(struct hclge_fd_rule *rule,
 		cpu_to_be32_array(spec_mask->ip6dst,
 				  rule->tuples_mask.dst_ip, IPV6_SIZE);
 
+	spec->tclass = rule->tuples.ip_tos;
+	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
+			0 : rule->tuples_mask.ip_tos;
+
 	spec->l4_proto = rule->tuples.ip_proto;
 	spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ?
 			0 : rule->tuples_mask.ip_proto;
@@ -6323,6 +6836,24 @@ static void hclge_fd_get_ether_info(struct hclge_fd_rule *rule,
 			0 : cpu_to_be16(rule->tuples_mask.ether_proto);
 }
 
+static void hclge_fd_get_user_def_info(struct ethtool_rx_flow_spec *fs,
+				       struct hclge_fd_rule *rule)
+{
+	if ((rule->unused_tuple & HCLGE_FD_TUPLE_USER_DEF_TUPLES) ==
+	    HCLGE_FD_TUPLE_USER_DEF_TUPLES) {
+		fs->h_ext.data[0] = 0;
+		fs->h_ext.data[1] = 0;
+		fs->m_ext.data[0] = 0;
+		fs->m_ext.data[1] = 0;
+	} else {
+		fs->h_ext.data[0] = cpu_to_be32(rule->ep.user_def.offset);
+		fs->h_ext.data[1] = cpu_to_be32(rule->ep.user_def.data);
+		fs->m_ext.data[0] =
+				cpu_to_be32(HCLGE_FD_USER_DEF_OFFSET_UNMASK);
+		fs->m_ext.data[1] = cpu_to_be32(rule->ep.user_def.data_mask);
+	}
+}
+
 static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
 				  struct hclge_fd_rule *rule)
 {
@@ -6331,6 +6862,8 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
 		fs->m_ext.vlan_tci =
 				rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
 				0 : cpu_to_be16(rule->tuples_mask.vlan_tag1);
+
+		hclge_fd_get_user_def_info(fs, rule);
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
@@ -6442,6 +6975,9 @@ static int hclge_get_all_rules(struct hnae3_handle *handle,
 			return -EMSGSIZE;
 		}
 
+		if (rule->state == HCLGE_FD_TO_DEL)
+			continue;
+
 		rule_locs[cnt] = rule->location;
 		cnt++;
 	}
@@ -6523,9 +7059,7 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
 	struct hclge_fd_rule_tuples new_tuples = {};
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
-	u16 tmp_queue_id;
 	u16 bit_id;
-	int ret;
 
 	if (!hnae3_dev_fd_supported(hdev))
 		return -EOPNOTSUPP;
@@ -6561,34 +7095,20 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
 			return -ENOMEM;
 		}
 
-		set_bit(bit_id, hdev->fd_bmap);
 		rule->location = bit_id;
 		rule->arfs.flow_id = flow_id;
 		rule->queue_id = queue_id;
 		hclge_fd_build_arfs_rule(&new_tuples, rule);
-		ret = hclge_fd_config_rule(hdev, rule);
-
-		spin_unlock_bh(&hdev->fd_rule_lock);
-
-		if (ret)
-			return ret;
-
-		return rule->location;
+		hclge_update_fd_list(hdev, HCLGE_FD_TO_ADD, rule->location,
+				     rule);
+		hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE;
+	} else if (rule->queue_id != queue_id) {
+		rule->queue_id = queue_id;
+		rule->state = HCLGE_FD_TO_ADD;
+		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
+		hclge_task_schedule(hdev, 0);
 	}
-
 	spin_unlock_bh(&hdev->fd_rule_lock);
-
-	if (rule->queue_id == queue_id)
-		return rule->location;
-
-	tmp_queue_id = rule->queue_id;
-	rule->queue_id = queue_id;
-	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
-	if (ret) {
-		rule->queue_id = tmp_queue_id;
-		return ret;
-	}
-
 	return rule->location;
 }
 
@@ -6598,7 +7118,6 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
 	struct hnae3_handle *handle = &hdev->vport[0].nic;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
-	HLIST_HEAD(del_list);
 
 	spin_lock_bh(&hdev->fd_rule_lock);
 	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) {
@@ -6606,33 +7125,50 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
 		return;
 	}
 	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (rule->state != HCLGE_FD_ACTIVE)
+			continue;
 		if (rps_may_expire_flow(handle->netdev, rule->queue_id,
 					rule->arfs.flow_id, rule->location)) {
-			hlist_del_init(&rule->rule_node);
-			hlist_add_head(&rule->rule_node, &del_list);
-			hdev->hclge_fd_rule_num--;
-			clear_bit(rule->location, hdev->fd_bmap);
+			rule->state = HCLGE_FD_TO_DEL;
+			set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 		}
 	}
 	spin_unlock_bh(&hdev->fd_rule_lock);
-
-	hlist_for_each_entry_safe(rule, node, &del_list, rule_node) {
-		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
-				     rule->location, NULL, false);
-		kfree(rule);
-	}
 #endif
 }
 
 /* make sure being called after lock up with fd_rule_lock */
-static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
+static int hclge_clear_arfs_rules(struct hclge_dev *hdev)
 {
 #ifdef CONFIG_RFS_ACCEL
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	int ret;
 
-	if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE)
-		hclge_del_all_fd_entries(handle, true);
+	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE)
+		return 0;
+
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		switch (rule->state) {
+		case HCLGE_FD_TO_DEL:
+		case HCLGE_FD_ACTIVE:
+			ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+						   rule->location, NULL, false);
+			if (ret)
+				return ret;
+			fallthrough;
+		case HCLGE_FD_TO_ADD:
+			hclge_fd_dec_rule_cnt(hdev, rule->location);
+			hlist_del(&rule->rule_node);
+			kfree(rule);
+			break;
+		default:
+			break;
+		}
+	}
+	hclge_sync_fd_state(hdev);
+
+	return 0;
 #endif
 }
 
@@ -6815,12 +7351,6 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle,
 	struct hclge_fd_rule *rule;
 	int ret;
 
-	if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
-		dev_err(&hdev->pdev->dev,
-			"please remove all exist fd rules via ethtool first\n");
-		return -EINVAL;
-	}
-
 	ret = hclge_check_cls_flower(hdev, cls_flower, tc);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
@@ -6833,8 +7363,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle,
 		return -ENOMEM;
 
 	ret = hclge_parse_cls_flower(hdev, cls_flower, rule);
-	if (ret)
-		goto err;
+	if (ret) {
+		kfree(rule);
+		return ret;
+	}
 
 	rule->action = HCLGE_FD_ACTION_SELECT_TC;
 	rule->cls_flower.tc = tc;
@@ -6843,22 +7375,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle,
 	rule->cls_flower.cookie = cls_flower->cookie;
 	rule->rule_type = HCLGE_FD_TC_FLOWER_ACTIVE;
 
-	spin_lock_bh(&hdev->fd_rule_lock);
-	hclge_clear_arfs_rules(handle);
-
-	ret = hclge_fd_config_rule(hdev, rule);
-
-	spin_unlock_bh(&hdev->fd_rule_lock);
-
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed to add cls flower rule, ret = %d\n", ret);
-		goto err;
-	}
+	ret = hclge_add_fd_entry_common(hdev, rule);
+	if (ret)
+		kfree(rule);
 
-	return 0;
-err:
-	kfree(rule);
 	return ret;
 }
 
@@ -6895,25 +7415,66 @@ static int hclge_del_cls_flower(struct hnae3_handle *handle,
 	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location,
 				   NULL, false);
 	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed to delete cls flower rule %u, ret = %d\n",
-			rule->location, ret);
 		spin_unlock_bh(&hdev->fd_rule_lock);
 		return ret;
 	}
 
-	ret = hclge_fd_update_rule_list(hdev, NULL, rule->location, false);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed to delete cls flower rule %u in list, ret = %d\n",
-			rule->location, ret);
-		spin_unlock_bh(&hdev->fd_rule_lock);
-		return ret;
+	hclge_update_fd_list(hdev, HCLGE_FD_DELETED, rule->location, NULL);
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return 0;
+}
+
+static void hclge_sync_fd_list(struct hclge_dev *hdev, struct hlist_head *hlist)
+{
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	int ret = 0;
+
+	if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state))
+		return;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	hlist_for_each_entry_safe(rule, node, hlist, rule_node) {
+		switch (rule->state) {
+		case HCLGE_FD_TO_ADD:
+			ret = hclge_fd_config_rule(hdev, rule);
+			if (ret)
+				goto out;
+			rule->state = HCLGE_FD_ACTIVE;
+			break;
+		case HCLGE_FD_TO_DEL:
+			ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+						   rule->location, NULL, false);
+			if (ret)
+				goto out;
+			hclge_fd_dec_rule_cnt(hdev, rule->location);
+			hclge_fd_free_node(hdev, rule);
+			break;
+		default:
+			break;
+		}
 	}
 
+out:
+	if (ret)
+		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
+
 	spin_unlock_bh(&hdev->fd_rule_lock);
+}
 
-	return 0;
+static void hclge_sync_fd_table(struct hclge_dev *hdev)
+{
+	if (test_and_clear_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state)) {
+		bool clear_list = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
+
+		hclge_clear_fd_rules_in_list(hdev, clear_list);
+	}
+
+	hclge_sync_fd_user_def_cfg(hdev, false);
+
+	hclge_sync_fd_list(hdev, &hdev->fd_rule_list);
 }
 
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
@@ -6953,18 +7514,15 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	bool clear;
 
 	hdev->fd_en = enable;
-	clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
 
-	if (!enable) {
-		spin_lock_bh(&hdev->fd_rule_lock);
-		hclge_del_all_fd_entries(handle, clear);
-		spin_unlock_bh(&hdev->fd_rule_lock);
-	} else {
+	if (!enable)
+		set_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state);
+	else
 		hclge_restore_fd_entries(handle);
-	}
+
+	hclge_task_schedule(hdev, 0);
 }
 
 static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
@@ -7125,19 +7683,19 @@ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
 	return ret;
 }
 
-static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
+static int hclge_cfg_common_loopback(struct hclge_dev *hdev, bool en,
 				     enum hnae3_loop loop_mode)
 {
-#define HCLGE_SERDES_RETRY_MS	10
-#define HCLGE_SERDES_RETRY_NUM	100
+#define HCLGE_COMMON_LB_RETRY_MS	10
+#define HCLGE_COMMON_LB_RETRY_NUM	100
 
-	struct hclge_serdes_lb_cmd *req;
+	struct hclge_common_lb_cmd *req;
 	struct hclge_desc desc;
 	int ret, i = 0;
 	u8 loop_mode_b;
 
-	req = (struct hclge_serdes_lb_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, false);
+	req = (struct hclge_common_lb_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, false);
 
 	switch (loop_mode) {
 	case HNAE3_LOOP_SERIAL_SERDES:
@@ -7146,9 +7704,12 @@ static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
 	case HNAE3_LOOP_PARALLEL_SERDES:
 		loop_mode_b = HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B;
 		break;
+	case HNAE3_LOOP_PHY:
+		loop_mode_b = HCLGE_CMD_GE_PHY_INNER_LOOP_B;
+		break;
 	default:
 		dev_err(&hdev->pdev->dev,
-			"unsupported serdes loopback mode %d\n", loop_mode);
+			"unsupported common loopback mode %d\n", loop_mode);
 		return -ENOTSUPP;
 	}
 
@@ -7162,39 +7723,39 @@ static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"serdes loopback set fail, ret = %d\n", ret);
+			"common loopback set fail, ret = %d\n", ret);
 		return ret;
 	}
 
 	do {
-		msleep(HCLGE_SERDES_RETRY_MS);
-		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK,
+		msleep(HCLGE_COMMON_LB_RETRY_MS);
+		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK,
 					   true);
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"serdes loopback get, ret = %d\n", ret);
+				"common loopback get, ret = %d\n", ret);
 			return ret;
 		}
-	} while (++i < HCLGE_SERDES_RETRY_NUM &&
-		 !(req->result & HCLGE_CMD_SERDES_DONE_B));
+	} while (++i < HCLGE_COMMON_LB_RETRY_NUM &&
+		 !(req->result & HCLGE_CMD_COMMON_LB_DONE_B));
 
-	if (!(req->result & HCLGE_CMD_SERDES_DONE_B)) {
-		dev_err(&hdev->pdev->dev, "serdes loopback set timeout\n");
+	if (!(req->result & HCLGE_CMD_COMMON_LB_DONE_B)) {
+		dev_err(&hdev->pdev->dev, "common loopback set timeout\n");
 		return -EBUSY;
-	} else if (!(req->result & HCLGE_CMD_SERDES_SUCCESS_B)) {
-		dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n");
+	} else if (!(req->result & HCLGE_CMD_COMMON_LB_SUCCESS_B)) {
+		dev_err(&hdev->pdev->dev, "common loopback set failed in fw\n");
 		return -EIO;
 	}
 	return ret;
 }
 
-static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
+static int hclge_set_common_loopback(struct hclge_dev *hdev, bool en,
 				     enum hnae3_loop loop_mode)
 {
 	int ret;
 
-	ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode);
+	ret = hclge_cfg_common_loopback(hdev, en, loop_mode);
 	if (ret)
 		return ret;
 
@@ -7243,8 +7804,12 @@ static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en)
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	int ret;
 
-	if (!phydev)
+	if (!phydev) {
+		if (hnae3_dev_phy_imp_supported(hdev))
+			return hclge_set_common_loopback(hdev, en,
+							 HNAE3_LOOP_PHY);
 		return -ENOTSUPP;
+	}
 
 	if (en)
 		ret = hclge_enable_phy_loopback(hdev, phydev);
@@ -7315,7 +7880,7 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
 		break;
 	case HNAE3_LOOP_SERIAL_SERDES:
 	case HNAE3_LOOP_PARALLEL_SERDES:
-		ret = hclge_set_serdes_loopback(hdev, en, loop_mode);
+		ret = hclge_set_common_loopback(hdev, en, loop_mode);
 		break;
 	case HNAE3_LOOP_PHY:
 		ret = hclge_set_phy_loopback(hdev, en);
@@ -7348,11 +7913,11 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev)
 	if (ret)
 		return ret;
 
-	ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
+	ret = hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
 	if (ret)
 		return ret;
 
-	return hclge_cfg_serdes_loopback(hdev, false,
+	return hclge_cfg_common_loopback(hdev, false,
 					 HNAE3_LOOP_PARALLEL_SERDES);
 }
 
@@ -7428,7 +7993,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 	spin_lock_bh(&hdev->fd_rule_lock);
-	hclge_clear_arfs_rules(handle);
+	hclge_clear_arfs_rules(hdev);
 	spin_unlock_bh(&hdev->fd_rule_lock);
 
 	/* If it is not PF reset, the firmware will disable the MAC,
@@ -8759,6 +9324,29 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
 	return 0;
 }
 
+static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd)
+{
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	if (!hnae3_dev_phy_imp_supported(hdev))
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = hdev->hw.mac.phy_addr;
+		/* this command reads phy id and register at the same time */
+		fallthrough;
+	case SIOCGMIIREG:
+		data->val_out = hclge_read_phy_reg(hdev, data->reg_num);
+		return 0;
+
+	case SIOCSMIIREG:
+		return hclge_write_phy_reg(hdev, data->reg_num, data->val_in);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
 			  int cmd)
 {
@@ -8766,7 +9354,7 @@ static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
 	struct hclge_dev *hdev = vport->back;
 
 	if (!hdev->hw.mac.phydev)
-		return -EOPNOTSUPP;
+		return hclge_mii_ioctl(hdev, ifr, cmd);
 
 	return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd);
 }
@@ -9441,7 +10029,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev)
 	hclge_restore_mac_table_common(vport);
 	hclge_restore_vport_vlan_table(vport);
 	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
-
+	set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
 	hclge_restore_fd_entries(handle);
 }
 
@@ -10013,9 +10601,10 @@ static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	struct phy_device *phydev = hdev->hw.mac.phydev;
+	u8 media_type = hdev->hw.mac.media_type;
 
-	*auto_neg = phydev ? hclge_get_autoneg(handle) : 0;
+	*auto_neg = (media_type == HNAE3_MEDIA_TYPE_COPPER) ?
+		    hclge_get_autoneg(handle) : 0;
 
 	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
 		*rx_en = 0;
@@ -10061,7 +10650,7 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	u32 fc_autoneg;
 
-	if (phydev) {
+	if (phydev || hnae3_dev_phy_imp_supported(hdev)) {
 		fc_autoneg = hclge_get_autoneg(handle);
 		if (auto_neg != fc_autoneg) {
 			dev_info(&hdev->pdev->dev,
@@ -10080,7 +10669,7 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
 
 	hclge_record_user_pauseparam(hdev, rx_en, tx_en);
 
-	if (!auto_neg)
+	if (!auto_neg || hnae3_dev_phy_imp_supported(hdev))
 		return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
 
 	if (phydev)
@@ -10638,7 +11227,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	if (ret)
 		goto err_msi_irq_uninit;
 
-	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
+	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER &&
+	    !hnae3_dev_phy_imp_supported(hdev)) {
 		ret = hclge_mac_mdio_config(hdev);
 		if (ret)
 			goto err_msi_irq_uninit;
@@ -11031,6 +11621,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	ret = hclge_tp_port_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to init tp port, ret = %d\n",
+			ret);
+		return ret;
+	}
+
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
 		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
@@ -11121,6 +11718,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_misc_affinity_teardown(hdev);
 	hclge_state_uninit(hdev);
 	hclge_uninit_mac_table(hdev);
+	hclge_del_all_fd_entries(hdev);
 
 	if (mac->phydev)
 		mdiobus_unregister(mac->mdio_bus);
@@ -11944,7 +12542,6 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_link_mode = hclge_get_link_mode,
 	.add_fd_entry = hclge_add_fd_entry,
 	.del_fd_entry = hclge_del_fd_entry,
-	.del_all_fd_entries = hclge_del_all_fd_entries,
 	.get_fd_rule_cnt = hclge_get_fd_rule_cnt,
 	.get_fd_rule_info = hclge_get_fd_rule_info,
 	.get_fd_all_rules = hclge_get_all_rules,
@@ -11972,6 +12569,8 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.add_cls_flower = hclge_add_cls_flower,
 	.del_cls_flower = hclge_del_cls_flower,
 	.cls_flower_active = hclge_is_cls_flower_active,
+	.get_phy_link_ksettings = hclge_get_phy_link_ksettings,
+	.set_phy_link_ksettings = hclge_set_phy_link_ksettings,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 19d7f28773f3..97e77e2f7539 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -223,6 +223,9 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_LINK_UPDATING,
 	HCLGE_STATE_PROMISC_CHANGED,
 	HCLGE_STATE_RST_FAIL,
+	HCLGE_STATE_FD_TBL_CHANGED,
+	HCLGE_STATE_FD_CLEAR_ALL,
+	HCLGE_STATE_FD_USER_DEF_CHANGED,
 	HCLGE_STATE_MAX
 };
 
@@ -536,6 +539,9 @@ enum HCLGE_FD_TUPLE {
 	MAX_TUPLE,
 };
 
+#define HCLGE_FD_TUPLE_USER_DEF_TUPLES \
+	(BIT(INNER_L2_RSV) | BIT(INNER_L3_RSV) | BIT(INNER_L4_RSV))
+
 enum HCLGE_FD_META_DATA {
 	PACKET_TYPE_ID,
 	IP_FRAGEMENT,
@@ -548,9 +554,21 @@ enum HCLGE_FD_META_DATA {
 	MAX_META_DATA,
 };
 
+enum HCLGE_FD_KEY_OPT {
+	KEY_OPT_U8,
+	KEY_OPT_LE16,
+	KEY_OPT_LE32,
+	KEY_OPT_MAC,
+	KEY_OPT_IP,
+	KEY_OPT_VNI,
+};
+
 struct key_info {
 	u8 key_type;
 	u8 key_length; /* use bit as unit */
+	enum HCLGE_FD_KEY_OPT key_opt;
+	int offset;
+	int moffset;
 };
 
 #define MAX_KEY_LENGTH	400
@@ -558,6 +576,11 @@ struct key_info {
 #define MAX_KEY_BYTES	(MAX_KEY_DWORDS * 4)
 #define MAX_META_DATA_LENGTH	32
 
+#define HCLGE_FD_MAX_USER_DEF_OFFSET	9000
+#define HCLGE_FD_USER_DEF_DATA		GENMASK(15, 0)
+#define HCLGE_FD_USER_DEF_OFFSET	GENMASK(15, 0)
+#define HCLGE_FD_USER_DEF_OFFSET_UNMASK	GENMASK(15, 0)
+
 /* assigned by firmware, the real filter number for each pf may be less */
 #define MAX_FD_FILTER_NUM	4096
 #define HCLGE_ARFS_EXPIRE_INTERVAL	5UL
@@ -580,6 +603,33 @@ enum HCLGE_FD_ACTION {
 	HCLGE_FD_ACTION_SELECT_TC,
 };
 
+enum HCLGE_FD_NODE_STATE {
+	HCLGE_FD_TO_ADD,
+	HCLGE_FD_TO_DEL,
+	HCLGE_FD_ACTIVE,
+	HCLGE_FD_DELETED,
+};
+
+enum HCLGE_FD_USER_DEF_LAYER {
+	HCLGE_FD_USER_DEF_NONE,
+	HCLGE_FD_USER_DEF_L2,
+	HCLGE_FD_USER_DEF_L3,
+	HCLGE_FD_USER_DEF_L4,
+};
+
+#define HCLGE_FD_USER_DEF_LAYER_NUM 3
+struct hclge_fd_user_def_cfg {
+	u16 ref_cnt;
+	u16 offset;
+};
+
+struct hclge_fd_user_def_info {
+	enum HCLGE_FD_USER_DEF_LAYER layer;
+	u16 data;
+	u16 data_mask;
+	u16 offset;
+};
+
 struct hclge_fd_key_cfg {
 	u8 key_sel;
 	u8 inner_sipv6_word_en;
@@ -596,6 +646,7 @@ struct hclge_fd_cfg {
 	u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */
 	u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */
 	struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM];
+	struct hclge_fd_user_def_cfg user_def_cfg[HCLGE_FD_USER_DEF_LAYER_NUM];
 };
 
 #define IPV4_INDEX	3
@@ -612,6 +663,9 @@ struct hclge_fd_rule_tuples {
 	u16 dst_port;
 	u16 vlan_tag1;
 	u16 ether_proto;
+	u16 l2_user_def;
+	u16 l3_user_def;
+	u32 l4_user_def;
 	u8 ip_tos;
 	u8 ip_proto;
 };
@@ -630,11 +684,15 @@ struct hclge_fd_rule {
 		struct {
 			u16 flow_id; /* only used for arfs */
 		} arfs;
+		struct {
+			struct hclge_fd_user_def_info user_def;
+		} ep;
 	};
 	u16 queue_id;
 	u16 vf_id;
 	u16 location;
 	enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type;
+	enum HCLGE_FD_NODE_STATE state;
 	u8 action;
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index e89820702540..08e88d9422cd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -268,3 +268,42 @@ void hclge_mac_stop_phy(struct hclge_dev *hdev)
 
 	phy_stop(phydev);
 }
+
+u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr)
+{
+	struct hclge_phy_reg_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PHY_REG, true);
+
+	req = (struct hclge_phy_reg_cmd *)desc.data;
+	req->reg_addr = cpu_to_le16(reg_addr);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to read phy reg, ret = %d.\n", ret);
+
+	return le16_to_cpu(req->reg_val);
+}
+
+int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val)
+{
+	struct hclge_phy_reg_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PHY_REG, false);
+
+	req = (struct hclge_phy_reg_cmd *)desc.data;
+	req->reg_addr = cpu_to_le16(reg_addr);
+	req->reg_val = cpu_to_le16(val);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to write phy reg, ret = %d.\n", ret);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
index dd9a1218a7b0..fd0e20190b90 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
@@ -9,5 +9,7 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle);
 void hclge_mac_disconnect_phy(struct hnae3_handle *handle);
 void hclge_mac_start_phy(struct hclge_dev *hdev);
 void hclge_mac_stop_phy(struct hclge_dev *hdev);
+u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr);
+int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val);
 
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 883d0d7c6858..3e54017a2a5b 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -279,7 +279,7 @@ static int hns_mdio_write(struct mii_bus *bus,
 static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
 	int ret;
-	u16 reg_val = 0;
+	u16 reg_val;
 	u8 devad = ((regnum >> 16) & 0x1f);
 	u8 is_c45 = !!(regnum & MII_ADDR_C45);
 	u16 reg = (u16)(regnum & 0xffff);
@@ -420,7 +420,7 @@ static int hns_mdio_probe(struct platform_device *pdev)
 {
 	struct hns_mdio_device *mdio_dev;
 	struct mii_bus *new_bus;
-	int ret = -ENODEV;
+	int ret;
 
 	if (!pdev) {
 		dev_err(NULL, "pdev is NULL!\r\n");
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index c340d9acba80..dc024ef521c0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -34,7 +34,7 @@
 #include "hinic_rx.h"
 #include "hinic_dev.h"
 
-#define SET_LINK_STR_MAX_LEN	128
+#define SET_LINK_STR_MAX_LEN	16
 
 #define GET_SUPPORTED_MODE	0
 #define GET_ADVERTISED_MODE	1
@@ -462,24 +462,19 @@ static int hinic_set_settings_to_hw(struct hinic_dev *nic_dev,
 {
 	struct hinic_link_ksettings_info settings = {0};
 	char set_link_str[SET_LINK_STR_MAX_LEN] = {0};
+	const char *autoneg_str;
 	struct net_device *netdev = nic_dev->netdev;
 	enum nic_speed_level speed_level = 0;
 	int err;
 
-	err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN, "%s",
-		       (set_settings & HILINK_LINK_SET_AUTONEG) ?
-		       (autoneg ? "autong enable " : "autong disable ") : "");
-	if (err < 0 || err >= SET_LINK_STR_MAX_LEN) {
-		netif_err(nic_dev, drv, netdev, "Failed to snprintf link state, function return(%d) and dest_len(%d)\n",
-			  err, SET_LINK_STR_MAX_LEN);
-		return -EFAULT;
-	}
+	autoneg_str = (set_settings & HILINK_LINK_SET_AUTONEG) ?
+		      (autoneg ? "autong enable " : "autong disable ") : "";
 
 	if (set_settings & HILINK_LINK_SET_SPEED) {
 		speed_level = hinic_ethtool_to_hw_speed_level(speed);
 		err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN,
-			       "%sspeed %d ", set_link_str, speed);
-		if (err <= 0 || err >= SET_LINK_STR_MAX_LEN) {
+			       "speed %d ", speed);
+		if (err >= SET_LINK_STR_MAX_LEN) {
 			netif_err(nic_dev, drv, netdev, "Failed to snprintf link speed, function return(%d) and dest_len(%d)\n",
 				  err, SET_LINK_STR_MAX_LEN);
 			return -EFAULT;
@@ -494,11 +489,11 @@ static int hinic_set_settings_to_hw(struct hinic_dev *nic_dev,
 	err = hinic_set_link_settings(nic_dev->hwdev, &settings);
 	if (err != HINIC_MGMT_CMD_UNSUPPORTED) {
 		if (err)
-			netif_err(nic_dev, drv, netdev, "Set %s failed\n",
-				  set_link_str);
+			netif_err(nic_dev, drv, netdev, "Set %s%sfailed\n",
+				  autoneg_str, set_link_str);
 		else
-			netif_info(nic_dev, drv, netdev, "Set %s successfully\n",
-				   set_link_str);
+			netif_info(nic_dev, drv, netdev, "Set %s%ssuccessfully\n",
+				   autoneg_str, set_link_str);
 
 		return err;
 	}
@@ -543,8 +538,8 @@ static void hinic_get_drvinfo(struct net_device *netdev,
 	struct hinic_hwif *hwif = hwdev->hwif;
 	int err;
 
-	strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
-	strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
+	strscpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
 
 	err = hinic_get_mgmt_version(nic_dev, mgmt_ver);
 	if (err)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
index 4e4029d5c8e1..06586173add7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -629,10 +629,8 @@ static int alloc_cmd_buf(struct hinic_api_cmd_chain *chain,
 
 	cmd_vaddr = dma_alloc_coherent(&pdev->dev, API_CMD_BUF_SIZE,
 				       &cmd_paddr, GFP_KERNEL);
-	if (!cmd_vaddr) {
-		dev_err(&pdev->dev, "Failed to allocate API CMD DMA memory\n");
+	if (!cmd_vaddr)
 		return -ENOMEM;
-	}
 
 	cell_ctxt = &chain->cell_ctxt[cell_idx];
 
@@ -679,10 +677,8 @@ static int api_cmd_create_cell(struct hinic_api_cmd_chain *chain,
 
 	node = dma_alloc_coherent(&pdev->dev, chain->cell_size, &node_paddr,
 				  GFP_KERNEL);
-	if (!node) {
-		dev_err(&pdev->dev, "Failed to allocate dma API CMD cell\n");
+	if (!node)
 		return -ENOMEM;
-	}
 
 	node->read.hw_wb_resp_paddr = 0;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index efbaed389440..cab38ff0713c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -334,7 +334,7 @@ static void set_dma_attr(struct hinic_hwif *hwif, u32 entry_idx,
 }
 
 /**
- * dma_attr_table_init - initialize the the default dma attributes
+ * dma_attr_table_init - initialize the default dma attributes
  * @hwif: the HW interface of a pci function device
  **/
 static void dma_attr_init(struct hinic_hwif *hwif)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 819fa13034c0..817173f1fbb7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -440,18 +440,14 @@ static void mgmt_recv_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt,
 				  struct hinic_recv_msg *recv_msg)
 {
 	struct hinic_mgmt_msg_handle_work *mgmt_work = NULL;
-	struct pci_dev *pdev = pf_to_mgmt->hwif->pdev;
 
 	mgmt_work = kzalloc(sizeof(*mgmt_work), GFP_KERNEL);
-	if (!mgmt_work) {
-		dev_err(&pdev->dev, "Allocate mgmt work memory failed\n");
+	if (!mgmt_work)
 		return;
-	}
 
 	if (recv_msg->msg_len) {
 		mgmt_work->msg = kzalloc(recv_msg->msg_len, GFP_KERNEL);
 		if (!mgmt_work->msg) {
-			dev_err(&pdev->dev, "Allocate mgmt msg memory failed\n");
 			kfree(mgmt_work);
 			return;
 		}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index fcf7bfe4aa47..dcba4d009bad 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -414,7 +414,6 @@ int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
 	rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
 					      &rq->pi_dma_addr, GFP_KERNEL);
 	if (!rq->pi_virt_addr) {
-		dev_err(&pdev->dev, "Failed to allocate PI address\n");
 		err = -ENOMEM;
 		goto err_pi_virt;
 	}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 070a7cc6392e..cce08647b9b2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -137,10 +137,8 @@ static struct sk_buff *rx_alloc_skb(struct hinic_rxq *rxq,
 	int err;
 
 	skb = netdev_alloc_skb_ip_align(rxq->netdev, rxq->rq->buf_sz);
-	if (!skb) {
-		netdev_err(rxq->netdev, "Failed to allocate Rx SKB\n");
+	if (!skb)
 		return NULL;
-	}
 
 	addr = dma_map_single(&pdev->dev, skb->data, rxq->rq->buf_sz,
 			      DMA_FROM_DEVICE);
@@ -212,10 +210,8 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
 
 	for (i = 0; i < free_wqebbs; i++) {
 		skb = rx_alloc_skb(rxq, &dma_addr);
-		if (!skb) {
-			netdev_err(rxq->netdev, "Failed to alloc Rx skb\n");
+		if (!skb)
 			goto skb_out;
-		}
 
 		hinic_set_sge(&sge, dma_addr, skb->len);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 8da7d46363b2..710c4ff7bc0e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -377,6 +377,7 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
 		} else if (ip.v4->version == 6) {
 			unsigned char *exthdr;
 			__be16 frag_off;
+
 			l3_type = IPV6_PKT;
 			tunnel_type = TUNNEL_UDP_CSUM;
 			exthdr = ip.hdr + sizeof(*ip.v6);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 4c0c9433bd60..19cf36360933 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -1183,6 +1183,7 @@ static s32 e1000_copper_link_igp_setup(struct e1000_hw *hw)
 			break;
 		case e1000_ms_auto:
 			phy_data &= ~CR_1000T_MS_ENABLE;
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 0ac8d79a7987..590ad110d383 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2745,7 +2745,7 @@ release:
 }
 
 /**
- *  e1000_k1_gig_workaround_lv - K1 Si workaround
+ *  e1000_k1_workaround_lv - K1 Si workaround
  *  @hw:   pointer to the HW structure
  *
  *  Workaround to set the K1 beacon duration for 82579 parts in 10Mbps
@@ -5220,7 +5220,7 @@ void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
 }
 
 /**
- *  e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3
+ *  e1000e_igp3_phy_powerdown_workaround_ich8lan - Power down workaround on D3
  *  @hw: pointer to the HW structure
  *
  *  Workaround for 82566 power-down on D3 entry:
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a0948002ddf8..88e9035b75cf 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -25,6 +25,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/aer.h>
 #include <linux/prefetch.h>
+#include <linux/suspend.h>
 
 #include "e1000.h"
 
@@ -5990,7 +5991,7 @@ static void e1000_reset_task(struct work_struct *work)
 }
 
 /**
- * e1000_get_stats64 - Get System Network Statistics
+ * e1000e_get_stats64 - Get System Network Statistics
  * @netdev: network interface device structure
  * @stats: rtnl_link_stats64 pointer
  *
@@ -6163,7 +6164,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
 }
 
 /**
- * e1000e_hwtstamp_ioctl - control hardware time stamping
+ * e1000e_hwtstamp_set - control hardware time stamping
  * @netdev: network interface device structure
  * @ifr: interface request
  *
@@ -6821,7 +6822,7 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
 }
 
 /**
- * e1000e_disable_aspm_locked   Disable ASPM states.
+ * e1000e_disable_aspm_locked - Disable ASPM states.
  * @pdev: pointer to PCI device struct
  * @state: bit-mask of ASPM states to disable
  *
@@ -6922,6 +6923,12 @@ static int __e1000_resume(struct pci_dev *pdev)
 	return 0;
 }
 
+static __maybe_unused int e1000e_pm_prepare(struct device *dev)
+{
+	return pm_runtime_suspended(dev) &&
+		pm_suspend_via_firmware();
+}
+
 static __maybe_unused int e1000e_pm_suspend(struct device *dev)
 {
 	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
@@ -7630,9 +7637,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	e1000_print_device_info(adapter);
 
-	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
 
-	if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp)
+	if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp)
 		pm_runtime_put_noidle(&pdev->dev);
 
 	return 0;
@@ -7855,6 +7862,7 @@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
 
 static const struct dev_pm_ops e1000_pm_ops = {
 #ifdef CONFIG_PM_SLEEP
+	.prepare	= e1000e_pm_prepare,
 	.suspend	= e1000e_pm_suspend,
 	.resume		= e1000e_pm_resume,
 	.freeze		= e1000e_pm_freeze,
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index bdd9dc163f15..1db35b2c7750 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -371,7 +371,7 @@ s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data)
 }
 
 /**
- *  e1000e_write_phy_reg_igp - Write igp PHY register
+ *  __e1000e_write_phy_reg_igp - Write igp PHY register
  *  @hw: pointer to the HW structure
  *  @offset: register offset to write to
  *  @data: data to write at register offset
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index f3f671311855..9e79d672f4f1 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -142,7 +142,7 @@ static int e1000e_phc_get_syncdevicetime(ktime_t *device,
 }
 
 /**
- * e1000e_phc_getsynctime - Reads the current system/device cross timestamp
+ * e1000e_phc_getcrosststamp - Reads the current system/device cross timestamp
  * @ptp: ptp clock structure
  * @xtstamp: structure containing timestamp
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
index c45315472245..86397c564dfc 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
@@ -105,7 +105,7 @@ static int fm10k_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
 }
 
 /**
- * fm10k_dcbnl_ieee_getdcbx - get the DCBX configuration for the device
+ * fm10k_dcbnl_getdcbx - get the DCBX configuration for the device
  * @dev: netdev interface for the device
  *
  * Returns that we support only IEEE DCB for this interface
@@ -116,7 +116,7 @@ static u8 fm10k_dcbnl_getdcbx(struct net_device __always_unused *dev)
 }
 
 /**
- * fm10k_dcbnl_ieee_setdcbx - get the DCBX configuration for the device
+ * fm10k_dcbnl_setdcbx - get the DCBX configuration for the device
  * @dev: netdev interface for the device
  * @mode: new mode for this device
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 1d27b2fb23af..5c77054d67c6 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -185,7 +185,7 @@ void fm10k_dbg_q_vector_init(struct fm10k_q_vector *q_vector)
 }
 
 /**
- * fm10k_dbg_free_q_vector_dir - setup debugfs for the q_vectors
+ * fm10k_dbg_q_vector_exit - setup debugfs for the q_vectors
  * @q_vector: q_vector to allocate directories for
  **/
 void fm10k_dbg_q_vector_exit(struct fm10k_q_vector *q_vector)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 247f44f4cb30..3362f26d7f99 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -1774,7 +1774,7 @@ static void fm10k_free_q_vectors(struct fm10k_intfc *interface)
 }
 
 /**
- * f10k_reset_msix_capability - reset MSI-X capability
+ * fm10k_reset_msix_capability - reset MSI-X capability
  * @interface: board private structure to initialize
  *
  * Reset the MSI-X capability back to its starting state
@@ -1787,7 +1787,7 @@ static void fm10k_reset_msix_capability(struct fm10k_intfc *interface)
 }
 
 /**
- * f10k_init_msix_capability - configure MSI-X capability
+ * fm10k_init_msix_capability - configure MSI-X capability
  * @interface: board private structure to initialize
  *
  * Attempt to configure the interrupts using the best available
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index 8e2e92bf3cd4..30ca9ee1900b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -692,7 +692,7 @@ static bool fm10k_mbx_tx_complete(struct fm10k_mbx_info *mbx)
 }
 
 /**
- *  fm10k_mbx_deqeueue_rx - Dequeues the message from the head in the Rx FIFO
+ *  fm10k_mbx_dequeue_rx - Dequeues the message from the head in the Rx FIFO
  *  @hw: pointer to hardware structure
  *  @mbx: pointer to mailbox
  *
@@ -1039,6 +1039,7 @@ static s32 fm10k_mbx_create_reply(struct fm10k_hw *hw,
 	case FM10K_STATE_CLOSED:
 		/* generate new header based on data */
 		fm10k_mbx_create_disconnect_hdr(mbx);
+		break;
 	default:
 		break;
 	}
@@ -2017,6 +2018,7 @@ static s32 fm10k_sm_mbx_process_reset(struct fm10k_hw *hw,
 	case FM10K_STATE_CONNECT:
 		/* Update remote value to match local value */
 		mbx->remote = mbx->local;
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index c0780c3624c8..af1b0cde3670 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1417,7 +1417,7 @@ s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 **results,
 }
 
 /**
- *  fm10k_update_stats_hw_pf - Updates hardware related statistics of PF
+ *  fm10k_update_hw_stats_pf - Updates hardware related statistics of PF
  *  @hw: pointer to hardware structure
  *  @stats: pointer to the stats structure to update
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index ec19e18305ec..41b813fe07a5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -2332,7 +2332,7 @@ i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
 }
 
 /**
- * i40e_get_vsi_params - get VSI configuration info
+ * i40e_aq_get_vsi_params - get VSI configuration info
  * @hw: pointer to the hw struct
  * @vsi_ctx: pointer to a vsi context struct
  * @cmd_details: pointer to command details structure or NULL
@@ -2586,7 +2586,7 @@ i40e_status i40e_get_link_status(struct i40e_hw *hw, bool *link_up)
 }
 
 /**
- * i40e_updatelink_status - update status of the HW network link
+ * i40e_update_link_info - update status of the HW network link
  * @hw: pointer to the hw struct
  **/
 noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw)
@@ -5059,7 +5059,7 @@ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num)
 }
 
 /**
- * i40e_blink_phy_led
+ * i40e_blink_phy_link_led
  * @hw: pointer to the HW structure
  * @time: time how long led will blinks in secs
  * @interval: gap between LED on and off in msecs
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 243b0d2b7b72..673f341f4c0c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -234,7 +234,7 @@ static void i40e_parse_ieee_app_tlv(struct i40e_lldp_org_tlv *tlv,
 }
 
 /**
- * i40e_parse_ieee_etsrec_tlv
+ * i40e_parse_ieee_tlv
  * @tlv: IEEE 802.1Qaz TLV
  * @dcbcfg: Local store to update ETS REC data
  *
@@ -1588,7 +1588,7 @@ void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share,
 }
 
 /**
- * i40e_dcb_hw_rx_ets_bw_config
+ * i40e_dcb_hw_rx_up2tc_config
  * @hw: pointer to the hw struct
  * @prio_tc: priority to tc assignment indexed by priority
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 0345132a0ef5..e32c61909b31 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -392,7 +392,7 @@ static void i40e_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
 }
 
 /**
- * i40e_dcbnl_set_pg_tc_cfg_tx - Set CEE PG Tx BW config
+ * i40e_dcbnl_set_pg_bwg_cfg_tx - Set CEE PG Tx BW config
  * @netdev: the corresponding netdev
  * @pgid: the corresponding traffic class
  * @bw_pct: the BW percentage for the specified traffic class
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
index 5e08f100c413..e1069ae658ad 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
@@ -77,7 +77,7 @@ static bool i40e_ddp_profiles_overlap(struct i40e_profile_info *new,
 }
 
 /**
- * i40e_ddp_does_profiles_ - checks if DDP overlaps with existing one.
+ * i40e_ddp_does_profile_overlap - checks if DDP overlaps with existing one.
  * @hw: HW data structure
  * @pinfo: DDP profile information structure
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index d7c13ca9be7d..e8230da29f05 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -651,7 +651,7 @@ static void i40e_dbg_dump_vsi_no_seid(struct i40e_pf *pf)
 }
 
 /**
- * i40e_dbg_dump_stats - handles dump stats write into command datum
+ * i40e_dbg_dump_eth_stats - handles dump stats write into command datum
  * @pf: the i40e_pf created in command write
  * @estats: the eth stats structure to be dumped
  **/
@@ -1638,7 +1638,7 @@ static const struct file_operations i40e_dbg_command_fops = {
 static char i40e_dbg_netdev_ops_buf[256] = "";
 
 /**
- * i40e_dbg_netdev_ops - read for netdev_ops datum
+ * i40e_dbg_netdev_ops_read - read for netdev_ops datum
  * @filp: the opened file
  * @buffer: where to write the data for the user to read
  * @count: the size of the user's buffer
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c70dec65a572..c4c167650b6b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -212,7 +212,7 @@ static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
 }
 
 /**
- * 40e_add_stat_strings - copy stat strings into ethtool buffer
+ * i40e_add_stat_strings - copy stat strings into ethtool buffer
  * @p: ethtool supplied buffer
  * @stats: stat definitions array
  *
@@ -2368,21 +2368,15 @@ static void i40e_get_priv_flag_strings(struct net_device *netdev, u8 *data)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	char *p = (char *)data;
 	unsigned int i;
+	u8 *p = data;
 
-	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
-			 i40e_gstrings_priv_flags[i].flag_string);
-		p += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++)
+		ethtool_sprintf(&p, i40e_gstrings_priv_flags[i].flag_string);
 	if (pf->hw.pf_id != 0)
 		return;
-	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
-			 i40e_gl_gstrings_priv_flags[i].flag_string);
-		p += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++)
+		ethtool_sprintf(&p, i40e_gl_gstrings_priv_flags[i].flag_string);
 }
 
 static void i40e_get_strings(struct net_device *netdev, u32 stringset,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index a3da422ab05b..d6e92ecddfbd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -511,7 +511,7 @@ configure_lan_hmc_out:
 }
 
 /**
- * i40e_delete_hmc_object - remove hmc objects
+ * i40e_delete_lan_hmc_object - remove hmc objects
  * @hw: pointer to the HW structure
  * @info: pointer to i40e_hmc_delete_obj_info struct
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 17f3b800640e..0f84ed0143e4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2023,7 +2023,7 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
 }
 
 /**
- * i40e_next_entry - Get the next non-broadcast filter from a list
+ * i40e_next_filter - Get the next non-broadcast filter from a list
  * @next: pointer to filter in list
  *
  * Returns the next non-broadcast filter in the list. Required so that we
@@ -5204,7 +5204,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
 }
 
 /**
- * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes
+ * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes
  * @pf: PF being queried
  *
  * Return a bitmap for enabled traffic classes for this PF.
@@ -9467,7 +9467,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 }
 
 /**
- * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed
+ * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed
  * @pf: board private structure
  **/
 u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 7164f4ad8120..fe6dca846028 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -4,7 +4,7 @@
 #include "i40e_prototype.h"
 
 /**
- * i40e_init_nvm_ops - Initialize NVM function pointers
+ * i40e_init_nvm - Initialize NVM function pointers
  * @hw: pointer to the HW structure
  *
  * Setup the function pointers and the NVM info structure. Should be called
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 7a879614ca55..f1f6fc3744e9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -216,7 +216,7 @@ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp,
 }
 
 /**
- * i40e_ptp_update_latch_events - Read I40E_PRTTSYN_STAT_1 and latch events
+ * i40e_ptp_get_rx_events - Read I40E_PRTTSYN_STAT_1 and latch events
  * @pf: the PF data structure
  *
  * This function reads I40E_PRTTSYN_STAT_1 and updates the corresponding timers
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 5747a99122fb..8b5820921377 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3333,7 +3333,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
 }
 
 /**
- * i40e_create_tx_ctx Build the Tx context descriptor
+ * i40e_create_tx_ctx - Build the Tx context descriptor
  * @tx_ring:  ring to create the descriptor on
  * @cd_type_cmd_tso_mss: Quad Word 1
  * @cd_tunneling: Quad Word 0 - bits 0-31
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index fc32c5019b0f..d89c22347d9d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -160,6 +160,13 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+		rcu_read_unlock();
+		return result;
+	}
+
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -167,10 +174,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
 		break;
-	case XDP_REDIRECT:
-		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
-		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
@@ -625,7 +628,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
 }
 
 /**
- * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
+ * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown
  * @tx_ring: XDP Tx ring
  **/
 void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
index c997063ed728..121e194ee734 100644
--- a/drivers/net/ethernet/intel/iavf/Makefile
+++ b/drivers/net/ethernet/intel/iavf/Makefile
@@ -11,5 +11,5 @@ subdir-ccflags-y += -I$(src)
 
 obj-$(CONFIG_IAVF) += iavf.o
 
-iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o \
+iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \
 	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 8a65525a7c0d..bda2a900df8e 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -37,6 +37,7 @@
 #include "iavf_type.h"
 #include <linux/avf/virtchnl.h>
 #include "iavf_txrx.h"
+#include "iavf_fdir.h"
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
 #define PFX "iavf: "
@@ -300,6 +301,8 @@ struct iavf_adapter {
 #define IAVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
 #define IAVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT(23)
 #define IAVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
+#define IAVF_FLAG_AQ_ADD_FDIR_FILTER		BIT(25)
+#define IAVF_FLAG_AQ_DEL_FDIR_FILTER		BIT(26)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -340,6 +343,8 @@ struct iavf_adapter {
 			  VIRTCHNL_VF_OFFLOAD_VLAN)
 #define ADV_LINK_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
 			      VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+#define FDIR_FLTR_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
+			       VIRTCHNL_VF_OFFLOAD_FDIR_PF)
 	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
 	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
 	struct virtchnl_version_info pf_version;
@@ -362,6 +367,11 @@ struct iavf_adapter {
 	/* lock to protect access to the cloud filter list */
 	spinlock_t cloud_filter_list_lock;
 	u16 num_cloud_filters;
+
+#define IAVF_MAX_FDIR_FILTERS 128	/* max allowed Flow Director filters */
+	u16 fdir_active_fltr;
+	struct list_head fdir_list_head;
+	spinlock_t fdir_fltr_lock;	/* protect the Flow Director filter list */
 };
 
 
@@ -432,6 +442,8 @@ void iavf_enable_channels(struct iavf_adapter *adapter);
 void iavf_disable_channels(struct iavf_adapter *adapter);
 void iavf_add_cloud_filter(struct iavf_adapter *adapter);
 void iavf_del_cloud_filter(struct iavf_adapter *adapter);
+void iavf_add_fdir_filter(struct iavf_adapter *adapter);
+void iavf_del_fdir_filter(struct iavf_adapter *adapter);
 struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
 					const u8 *macaddr);
 #endif /* _IAVF_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index c93567f4d0f7..3ebfef737f5c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -828,6 +828,623 @@ static int iavf_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
 }
 
 /**
+ * iavf_fltr_to_ethtool_flow - convert filter type values to ethtool
+ * flow type values
+ * @flow: filter type to be converted
+ *
+ * Returns the corresponding ethtool flow type.
+ */
+static int iavf_fltr_to_ethtool_flow(enum iavf_fdir_flow_type flow)
+{
+	switch (flow) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+		return TCP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+		return UDP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		return SCTP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+		return AH_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		return ESP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		return IPV4_USER_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		return TCP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		return UDP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		return SCTP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		return AH_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		return ESP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		return IPV6_USER_FLOW;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		return ETHER_FLOW;
+	default:
+		/* 0 is undefined ethtool flow */
+		return 0;
+	}
+}
+
+/**
+ * iavf_ethtool_flow_to_fltr - convert ethtool flow type to filter enum
+ * @eth: Ethtool flow type to be converted
+ *
+ * Returns flow enum
+ */
+static enum iavf_fdir_flow_type iavf_ethtool_flow_to_fltr(int eth)
+{
+	switch (eth) {
+	case TCP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_TCP;
+	case UDP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_UDP;
+	case SCTP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_SCTP;
+	case AH_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_AH;
+	case ESP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_ESP;
+	case IPV4_USER_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_OTHER;
+	case TCP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_TCP;
+	case UDP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_UDP;
+	case SCTP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_SCTP;
+	case AH_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_AH;
+	case ESP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_ESP;
+	case IPV6_USER_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_OTHER;
+	case ETHER_FLOW:
+		return IAVF_FDIR_FLOW_NON_IP_L2;
+	default:
+		return IAVF_FDIR_FLOW_NONE;
+	}
+}
+
+/**
+ * iavf_is_mask_valid - check mask field set
+ * @mask: full mask to check
+ * @field: field for which mask should be valid
+ *
+ * If the mask is fully set return true. If it is not valid for field return
+ * false.
+ */
+static bool iavf_is_mask_valid(u64 mask, u64 field)
+{
+	return (mask & field) == field;
+}
+
+/**
+ * iavf_parse_rx_flow_user_data - deconstruct user-defined data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: pointer to Flow Director filter for userdef data storage
+ *
+ * Returns 0 on success, negative error value on failure
+ */
+static int
+iavf_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+			     struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_flex_word *flex;
+	int i, cnt = 0;
+
+	if (!(fsp->flow_type & FLOW_EXT))
+		return 0;
+
+	for (i = 0; i < 2; i++) {
+#define IAVF_USERDEF_FLEX_WORD_M	GENMASK(15, 0)
+#define IAVF_USERDEF_FLEX_OFFS_S	16
+#define IAVF_USERDEF_FLEX_OFFS_M	GENMASK(31, IAVF_USERDEF_FLEX_OFFS_S)
+#define IAVF_USERDEF_FLEX_FLTR_M	GENMASK(31, 0)
+		u32 value = be32_to_cpu(fsp->h_ext.data[i]);
+		u32 mask = be32_to_cpu(fsp->m_ext.data[i]);
+
+		if (!value || !mask)
+			continue;
+
+		if (!iavf_is_mask_valid(mask, IAVF_USERDEF_FLEX_FLTR_M))
+			return -EINVAL;
+
+		/* 504 is the maximum value for offsets, and offset is measured
+		 * from the start of the MAC address.
+		 */
+#define IAVF_USERDEF_FLEX_MAX_OFFS_VAL 504
+		flex = &fltr->flex_words[cnt++];
+		flex->word = value & IAVF_USERDEF_FLEX_WORD_M;
+		flex->offset = (value & IAVF_USERDEF_FLEX_OFFS_M) >>
+			     IAVF_USERDEF_FLEX_OFFS_S;
+		if (flex->offset > IAVF_USERDEF_FLEX_MAX_OFFS_VAL)
+			return -EINVAL;
+	}
+
+	fltr->flex_cnt = cnt;
+
+	return 0;
+}
+
+/**
+ * iavf_fill_rx_flow_ext_data - fill the additional data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: pointer to Flow Director filter to get additional data
+ */
+static void
+iavf_fill_rx_flow_ext_data(struct ethtool_rx_flow_spec *fsp,
+			   struct iavf_fdir_fltr *fltr)
+{
+	if (!fltr->ext_mask.usr_def[0] && !fltr->ext_mask.usr_def[1])
+		return;
+
+	fsp->flow_type |= FLOW_EXT;
+
+	memcpy(fsp->h_ext.data, fltr->ext_data.usr_def, sizeof(fsp->h_ext.data));
+	memcpy(fsp->m_ext.data, fltr->ext_mask.usr_def, sizeof(fsp->m_ext.data));
+}
+
+/**
+ * iavf_get_ethtool_fdir_entry - fill ethtool structure with Flow Director filter data
+ * @adapter: the VF adapter structure that contains filter list
+ * @cmd: ethtool command data structure to receive the filter data
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+static int
+iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
+			    struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct iavf_fdir_fltr *rule = NULL;
+	int ret = 0;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+
+	rule = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
+	if (!rule) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	fsp->flow_type = iavf_fltr_to_ethtool_flow(rule->flow_type);
+
+	memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+	memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+	switch (fsp->flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.tcp_ip4_spec.psrc = rule->ip_data.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->ip_data.dst_port;
+		fsp->h_u.tcp_ip4_spec.tos = rule->ip_data.tos;
+		fsp->m_u.tcp_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.tcp_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.tcp_ip4_spec.psrc = rule->ip_mask.src_port;
+		fsp->m_u.tcp_ip4_spec.pdst = rule->ip_mask.dst_port;
+		fsp->m_u.tcp_ip4_spec.tos = rule->ip_mask.tos;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		fsp->h_u.ah_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.ah_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.ah_ip4_spec.spi = rule->ip_data.spi;
+		fsp->h_u.ah_ip4_spec.tos = rule->ip_data.tos;
+		fsp->m_u.ah_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.ah_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.ah_ip4_spec.spi = rule->ip_mask.spi;
+		fsp->m_u.ah_ip4_spec.tos = rule->ip_mask.tos;
+		break;
+	case IPV4_USER_FLOW:
+		fsp->h_u.usr_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip_data.l4_header;
+		fsp->h_u.usr_ip4_spec.tos = rule->ip_data.tos;
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = rule->ip_data.proto;
+		fsp->m_u.usr_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.usr_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->ip_mask.l4_header;
+		fsp->m_u.usr_ip4_spec.tos = rule->ip_mask.tos;
+		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
+		fsp->m_u.usr_ip4_spec.proto = rule->ip_mask.proto;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.tcp_ip6_spec.psrc = rule->ip_data.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = rule->ip_data.dst_port;
+		fsp->h_u.tcp_ip6_spec.tclass = rule->ip_data.tclass;
+		memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.tcp_ip6_spec.psrc = rule->ip_mask.src_port;
+		fsp->m_u.tcp_ip6_spec.pdst = rule->ip_mask.dst_port;
+		fsp->m_u.tcp_ip6_spec.tclass = rule->ip_mask.tclass;
+		break;
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+		memcpy(fsp->h_u.ah_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.ah_ip6_spec.spi = rule->ip_data.spi;
+		fsp->h_u.ah_ip6_spec.tclass = rule->ip_data.tclass;
+		memcpy(fsp->m_u.ah_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.ah_ip6_spec.spi = rule->ip_mask.spi;
+		fsp->m_u.ah_ip6_spec.tclass = rule->ip_mask.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip_data.l4_header;
+		fsp->h_u.usr_ip6_spec.tclass = rule->ip_data.tclass;
+		fsp->h_u.usr_ip6_spec.l4_proto = rule->ip_data.proto;
+		memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->ip_mask.l4_header;
+		fsp->m_u.usr_ip6_spec.tclass = rule->ip_mask.tclass;
+		fsp->m_u.usr_ip6_spec.l4_proto = rule->ip_mask.proto;
+		break;
+	case ETHER_FLOW:
+		fsp->h_u.ether_spec.h_proto = rule->eth_data.etype;
+		fsp->m_u.ether_spec.h_proto = rule->eth_mask.etype;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	iavf_fill_rx_flow_ext_data(fsp, rule);
+
+	if (rule->action == VIRTCHNL_ACTION_DROP)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->q_index;
+
+release_lock:
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	return ret;
+}
+
+/**
+ * iavf_get_fdir_fltr_ids - fill buffer with filter IDs of active filters
+ * @adapter: the VF adapter structure containing the filter list
+ * @cmd: ethtool command data structure
+ * @rule_locs: ethtool array passed in from OS to receive filter IDs
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+static int
+iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd,
+		       u32 *rule_locs)
+{
+	struct iavf_fdir_fltr *fltr;
+	unsigned int cnt = 0;
+	int val = 0;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	cmd->data = IAVF_MAX_FDIR_FILTERS;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+
+	list_for_each_entry(fltr, &adapter->fdir_list_head, list) {
+		if (cnt == cmd->rule_cnt) {
+			val = -EMSGSIZE;
+			goto release_lock;
+		}
+		rule_locs[cnt] = fltr->loc;
+		cnt++;
+	}
+
+release_lock:
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	if (!val)
+		cmd->rule_cnt = cnt;
+
+	return val;
+}
+
+/**
+ * iavf_add_fdir_fltr_info - Set the input set for Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: filter structure
+ */
+static int
+iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spec *fsp,
+			struct iavf_fdir_fltr *fltr)
+{
+	u32 flow_type, q_index = 0;
+	enum virtchnl_action act;
+	int err;
+
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		act = VIRTCHNL_ACTION_DROP;
+	} else {
+		q_index = fsp->ring_cookie;
+		if (q_index >= adapter->num_active_queues)
+			return -EINVAL;
+
+		act = VIRTCHNL_ACTION_QUEUE;
+	}
+
+	fltr->action = act;
+	fltr->loc = fsp->location;
+	fltr->q_index = q_index;
+
+	if (fsp->flow_type & FLOW_EXT) {
+		memcpy(fltr->ext_data.usr_def, fsp->h_ext.data,
+		       sizeof(fltr->ext_data.usr_def));
+		memcpy(fltr->ext_mask.usr_def, fsp->m_ext.data,
+		       sizeof(fltr->ext_mask.usr_def));
+	}
+
+	flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+	fltr->flow_type = iavf_ethtool_flow_to_fltr(flow_type);
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+		fltr->ip_data.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		fltr->ip_data.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+		fltr->ip_data.tos = fsp->h_u.tcp_ip4_spec.tos;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+		fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+		fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+		fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.ah_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.ah_ip4_spec.ip4dst;
+		fltr->ip_data.spi = fsp->h_u.ah_ip4_spec.spi;
+		fltr->ip_data.tos = fsp->h_u.ah_ip4_spec.tos;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.ah_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst;
+		fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi;
+		fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos;
+		break;
+	case IPV4_USER_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+		fltr->ip_data.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+		fltr->ip_data.tos = fsp->h_u.usr_ip4_spec.tos;
+		fltr->ip_data.proto = fsp->h_u.usr_ip4_spec.proto;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+		fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+		fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
+		fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		fltr->ip_data.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+		fltr->ip_data.tclass = fsp->h_u.tcp_ip6_spec.tclass;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+		fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+		fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass;
+		break;
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.ah_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.ah_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.spi = fsp->h_u.ah_ip6_spec.spi;
+		fltr->ip_data.tclass = fsp->h_u.ah_ip6_spec.tclass;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.ah_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.ah_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi;
+		fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes;
+		fltr->ip_data.tclass = fsp->h_u.usr_ip6_spec.tclass;
+		fltr->ip_data.proto = fsp->h_u.usr_ip6_spec.l4_proto;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+		fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
+		fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+		break;
+	case ETHER_FLOW:
+		fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto;
+		fltr->eth_mask.etype = fsp->m_u.ether_spec.h_proto;
+		break;
+	default:
+		/* not doing un-parsed flow types */
+		return -EINVAL;
+	}
+
+	if (iavf_fdir_is_dup_fltr(adapter, fltr))
+		return -EEXIST;
+
+	err = iavf_parse_rx_flow_user_data(fsp, fltr);
+	if (err)
+		return err;
+
+	return iavf_fill_fdir_add_msg(adapter, fltr);
+}
+
+/**
+ * iavf_add_fdir_ethtool - add Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: command to add Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct iavf_fdir_fltr *fltr;
+	int count = 50;
+	int err;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return -EINVAL;
+
+	if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
+			IAVF_MAX_FDIR_FILTERS);
+		return -ENOSPC;
+	}
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
+		dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		return -EEXIST;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
+	if (!fltr)
+		return -ENOMEM;
+
+	while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		if (--count == 0) {
+			kfree(fltr);
+			return -EINVAL;
+		}
+		udelay(1);
+	}
+
+	err = iavf_add_fdir_fltr_info(adapter, fsp, fltr);
+	if (err)
+		goto ret;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	iavf_fdir_list_add_fltr(adapter, fltr);
+	adapter->fdir_active_fltr++;
+	fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+
+ret:
+	if (err && fltr)
+		kfree(fltr);
+
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	return err;
+}
+
+/**
+ * iavf_del_fdir_ethtool - delete Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: command to delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct iavf_fdir_fltr *fltr = NULL;
+	int err = 0;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	fltr = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
+	if (fltr) {
+		if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) {
+			fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		} else {
+			err = -EBUSY;
+		}
+	} else if (adapter->fdir_active_fltr) {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST)
+		mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+
+	return err;
+}
+
+/**
+ * iavf_set_rxnfc - command to set Rx flow rules.
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns 0 for success and negative values for errors
+ */
+static int iavf_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = iavf_add_fdir_ethtool(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = iavf_del_fdir_ethtool(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
  * iavf_get_rxnfc - command to get RX flow classification rules
  * @netdev: network interface device structure
  * @cmd: ethtool rxnfc command
@@ -846,6 +1463,19 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		cmd->data = adapter->num_active_queues;
 		ret = 0;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		if (!FDIR_FLTR_SUPPORT(adapter))
+			break;
+		cmd->rule_cnt = adapter->fdir_active_fltr;
+		cmd->data = IAVF_MAX_FDIR_FILTERS;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = iavf_get_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = iavf_get_fdir_fltr_ids(adapter, cmd, (u32 *)rule_locs);
+		break;
 	case ETHTOOL_GRXFH:
 		netdev_info(netdev,
 			    "RSS hash info is not available to vf, use pf.\n");
@@ -1025,6 +1655,7 @@ static const struct ethtool_ops iavf_ethtool_ops = {
 	.set_coalesce		= iavf_set_coalesce,
 	.get_per_queue_coalesce = iavf_get_per_queue_coalesce,
 	.set_per_queue_coalesce = iavf_set_per_queue_coalesce,
+	.set_rxnfc		= iavf_set_rxnfc,
 	.get_rxnfc		= iavf_get_rxnfc,
 	.get_rxfh_indir_size	= iavf_get_rxfh_indir_size,
 	.get_rxfh		= iavf_get_rxfh,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
new file mode 100644
index 000000000000..3e687189d737
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -0,0 +1,773 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+/* flow director ethtool support for iavf */
+
+#include "iavf.h"
+
+#define GTPU_PORT	2152
+#define NAT_T_ESP_PORT	4500
+#define PFCP_PORT	8805
+
+static const struct in6_addr ipv6_addr_full_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+		}
+	}
+};
+
+/**
+ * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload
+ * @fltr: Flow Director filter data structure
+ */
+static u16 iavf_pkt_udp_no_pay_len(struct iavf_fdir_fltr *fltr)
+{
+	return sizeof(struct ethhdr) +
+	       (fltr->ip_ver == 4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
+	       sizeof(struct udphdr);
+}
+
+/**
+ * iavf_fill_fdir_gtpu_hdr - fill the GTP-U protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the GTP-U protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_gtpu_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *ghdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct virtchnl_proto_hdr *ehdr = NULL; /* Extension Header if it exists */
+	u16 adj_offs, hdr_offs;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(ghdr, GTPU_IP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_GTPU_HDR_TEID_OFFS0	4
+#define IAVF_GTPU_HDR_TEID_OFFS1	6
+#define IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS	10
+#define IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS		13
+#define IAVF_GTPU_PSC_EXTHDR_TYPE	0x85 /* PDU Session Container Extension Header */
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_GTPU_HDR_TEID_OFFS0:
+		case IAVF_GTPU_HDR_TEID_OFFS1: {
+			__be16 *pay_word = (__be16 *)ghdr->buffer;
+
+			pay_word[hdr_offs >> 1] = htons(fltr->flex_words[i].word);
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ghdr, GTPU_IP, TEID);
+			}
+			break;
+		case IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS:
+			if ((fltr->flex_words[i].word & 0xff) != IAVF_GTPU_PSC_EXTHDR_TYPE)
+				return -EOPNOTSUPP;
+			if (!ehdr)
+				ehdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+			VIRTCHNL_SET_PROTO_HDR_TYPE(ehdr, GTPU_EH);
+			break;
+		case IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS:
+			if (!ehdr)
+				return -EINVAL;
+			ehdr->buffer[1] = fltr->flex_words[i].word & 0x3F;
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ehdr, GTPU_EH, QFI);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_pfcp_hdr - fill the PFCP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the PFCP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_pfcp_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	u16 adj_offs, hdr_offs;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, PFCP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS	0
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS:
+			hdr->buffer[0] = (fltr->flex_words[i].word >> 8) & 0xff;
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, PFCP, S_FIELD);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_nat_t_esp_hdr - fill the NAT-T-ESP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the NAT-T-ESP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_nat_t_esp_hdr(struct iavf_fdir_fltr *fltr,
+			     struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	u16 adj_offs, hdr_offs;
+	u32 spi = 0;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_NAT_T_ESP_SPI_OFFS0	0
+#define IAVF_NAT_T_ESP_SPI_OFFS1	2
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_NAT_T_ESP_SPI_OFFS0:
+			spi |= fltr->flex_words[i].word << 16;
+			break;
+		case IAVF_NAT_T_ESP_SPI_OFFS1:
+			spi |= fltr->flex_words[i].word;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (!spi)
+		return -EOPNOTSUPP; /* Not support IKE Header Format with SPI 0 */
+
+	*(__be32 *)hdr->buffer = htonl(spi);
+	VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI);
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_udp_flex_pay_hdr - fill the UDP payload header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the UDP payload defined protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_udp_flex_pay_hdr(struct iavf_fdir_fltr *fltr,
+				struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	int err;
+
+	switch (ntohs(fltr->ip_data.dst_port)) {
+	case GTPU_PORT:
+		err = iavf_fill_fdir_gtpu_hdr(fltr, proto_hdrs);
+		break;
+	case NAT_T_ESP_PORT:
+		err = iavf_fill_fdir_nat_t_esp_hdr(fltr, proto_hdrs);
+		break;
+	case PFCP_PORT:
+		err = iavf_fill_fdir_pfcp_hdr(fltr, proto_hdrs);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * iavf_fill_fdir_ip4_hdr - fill the IPv4 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the IPv4 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct iphdr *iph = (struct iphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV4);
+
+	if (fltr->ip_mask.tos == U8_MAX) {
+		iph->tos = fltr->ip_data.tos;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DSCP);
+	}
+
+	if (fltr->ip_mask.proto == U8_MAX) {
+		iph->protocol = fltr->ip_data.proto;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, PROT);
+	}
+
+	if (fltr->ip_mask.v4_addrs.src_ip == htonl(U32_MAX)) {
+		iph->saddr = fltr->ip_data.v4_addrs.src_ip;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, SRC);
+	}
+
+	if (fltr->ip_mask.v4_addrs.dst_ip == htonl(U32_MAX)) {
+		iph->daddr = fltr->ip_data.v4_addrs.dst_ip;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
+	}
+
+	fltr->ip_ver = 4;
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_ip6_hdr - fill the IPv6 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the IPv6 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ipv6hdr *iph = (struct ipv6hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV6);
+
+	if (fltr->ip_mask.tclass == U8_MAX) {
+		iph->priority = (fltr->ip_data.tclass >> 4) & 0xF;
+		iph->flow_lbl[0] = (fltr->ip_data.tclass << 4) & 0xF0;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, TC);
+	}
+
+	if (fltr->ip_mask.proto == U8_MAX) {
+		iph->nexthdr = fltr->ip_data.proto;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, PROT);
+	}
+
+	if (!memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
+		    sizeof(struct in6_addr))) {
+		memcpy(&iph->saddr, &fltr->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, SRC);
+	}
+
+	if (!memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
+		    sizeof(struct in6_addr))) {
+		memcpy(&iph->daddr, &fltr->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
+	}
+
+	fltr->ip_ver = 6;
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_tcp_hdr - fill the TCP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the TCP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_tcp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct tcphdr *tcph = (struct tcphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, TCP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		tcph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		tcph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_udp_hdr - fill the UDP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the UDP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_udp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct udphdr *udph = (struct udphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, UDP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		udph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		udph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT);
+	}
+
+	if (!fltr->flex_cnt)
+		return 0;
+
+	return iavf_fill_fdir_udp_flex_pay_hdr(fltr, proto_hdrs);
+}
+
+/**
+ * iavf_fill_fdir_sctp_hdr - fill the SCTP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the SCTP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_sctp_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct sctphdr *sctph = (struct sctphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, SCTP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		sctph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		sctph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_ah_hdr - fill the AH protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the AH protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ah_hdr(struct iavf_fdir_fltr *fltr,
+		      struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ip_auth_hdr *ah = (struct ip_auth_hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, AH);
+
+	if (fltr->ip_mask.spi == htonl(U32_MAX)) {
+		ah->spi = fltr->ip_data.spi;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, AH, SPI);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_esp_hdr - fill the ESP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the ESP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_esp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP);
+
+	if (fltr->ip_mask.spi == htonl(U32_MAX)) {
+		esph->spi = fltr->ip_data.spi;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_l4_hdr - fill the L4 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the L4 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_l4_hdr(struct iavf_fdir_fltr *fltr,
+		      struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr;
+	__be32 *l4_4_data;
+
+	if (!fltr->ip_mask.proto) /* IPv4/IPv6 header only */
+		return 0;
+
+	hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	l4_4_data = (__be32 *)hdr->buffer;
+
+	/* L2TPv3 over IP with 'Session ID' */
+	if (fltr->ip_data.proto == 115 && fltr->ip_mask.l4_header == htonl(U32_MAX)) {
+		VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, L2TPV3);
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, L2TPV3, SESS_ID);
+
+		*l4_4_data = fltr->ip_data.l4_header;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_eth_hdr - fill the Ethernet protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the Ethernet protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_eth_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ethhdr *ehdr = (struct ethhdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ETH);
+
+	if (fltr->eth_mask.etype == htons(U16_MAX)) {
+		if (fltr->eth_data.etype == htons(ETH_P_IP) ||
+		    fltr->eth_data.etype == htons(ETH_P_IPV6))
+			return -EOPNOTSUPP;
+
+		ehdr->h_proto = fltr->eth_data.etype;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ETH, ETHERTYPE);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_add_msg - fill the Flow Director filter into virtchnl message
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns 0 if the add Flow Director virtchnl message is filled successfully
+ */
+int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct virtchnl_fdir_add *vc_msg = &fltr->vc_add_msg;
+	struct virtchnl_proto_hdrs *proto_hdrs;
+	int err;
+
+	proto_hdrs = &vc_msg->rule_cfg.proto_hdrs;
+
+	err = iavf_fill_fdir_eth_hdr(fltr, proto_hdrs); /* L2 always exists */
+	if (err)
+		return err;
+
+	switch (fltr->flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_udp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_ah_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_esp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_udp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_ah_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_esp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if (err)
+		return err;
+
+	vc_msg->vsi_id = adapter->vsi.id;
+	vc_msg->rule_cfg.action_set.count = 1;
+	vc_msg->rule_cfg.action_set.actions[0].type = fltr->action;
+	vc_msg->rule_cfg.action_set.actions[0].act_conf.queue.index = fltr->q_index;
+
+	return 0;
+}
+
+/**
+ * iavf_fdir_flow_proto_name - get the flow protocol name
+ * @flow_type: Flow Director filter flow type
+ **/
+static const char *iavf_fdir_flow_proto_name(enum iavf_fdir_flow_type flow_type)
+{
+	switch (flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		return "TCP";
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		return "UDP";
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		return "SCTP";
+	case IAVF_FDIR_FLOW_IPV4_AH:
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		return "AH";
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		return "ESP";
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		return "Other";
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		return "Ethernet";
+	default:
+		return NULL;
+	}
+}
+
+/**
+ * iavf_print_fdir_fltr
+ * @adapter: adapter structure
+ * @fltr: Flow Director filter to print
+ *
+ * Print the Flow Director filter
+ **/
+void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	const char *proto = iavf_fdir_flow_proto_name(fltr->flow_type);
+
+	if (!proto)
+		return;
+
+	switch (fltr->flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: dst_port %hu src_port %hu\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 proto,
+			 ntohs(fltr->ip_data.dst_port),
+			 ntohs(fltr->ip_data.src_port));
+		break;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: SPI %u\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 proto,
+			 ntohl(fltr->ip_data.spi));
+		break;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 proto: %u L4_bytes: 0x%x\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 fltr->ip_data.proto,
+			 ntohl(fltr->ip_data.l4_header));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: dst_port %hu src_port %hu\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 proto,
+			 ntohs(fltr->ip_data.dst_port),
+			 ntohs(fltr->ip_data.src_port));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: SPI %u\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 proto,
+			 ntohl(fltr->ip_data.spi));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 proto: %u L4_bytes: 0x%x\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 fltr->ip_data.proto,
+			 ntohl(fltr->ip_data.l4_header));
+		break;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u eth_type: 0x%x\n",
+			 fltr->loc,
+			 ntohs(fltr->eth_data.etype));
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * iavf_fdir_is_dup_fltr - test if filter is already in list
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns true if the filter is found in the list
+ */
+bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_fdir_fltr *tmp;
+	bool ret = false;
+
+	list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
+		if (tmp->flow_type != fltr->flow_type)
+			continue;
+
+		if (!memcmp(&tmp->eth_data, &fltr->eth_data,
+			    sizeof(fltr->eth_data)) &&
+		    !memcmp(&tmp->ip_data, &fltr->ip_data,
+			    sizeof(fltr->ip_data)) &&
+		    !memcmp(&tmp->ext_data, &fltr->ext_data,
+			    sizeof(fltr->ext_data))) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_find_fdir_fltr_by_loc - find filter with location
+ * @adapter: pointer to the VF adapter structure
+ * @loc: location to find.
+ *
+ * Returns pointer to Flow Director filter if found or null
+ */
+struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc)
+{
+	struct iavf_fdir_fltr *rule;
+
+	list_for_each_entry(rule, &adapter->fdir_list_head, list)
+		if (rule->loc == loc)
+			return rule;
+
+	return NULL;
+}
+
+/**
+ * iavf_fdir_list_add_fltr - add a new node to the flow director filter list
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: filter node to add to structure
+ */
+void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_fdir_fltr *rule, *parent = NULL;
+
+	list_for_each_entry(rule, &adapter->fdir_list_head, list) {
+		if (rule->loc >= fltr->loc)
+			break;
+		parent = rule;
+	}
+
+	if (parent)
+		list_add(&fltr->list, &parent->list);
+	else
+		list_add(&fltr->list, &adapter->fdir_list_head);
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
new file mode 100644
index 000000000000..2439c970b657
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _IAVF_FDIR_H_
+#define _IAVF_FDIR_H_
+
+struct iavf_adapter;
+
+/* State of Flow Director filter */
+enum iavf_fdir_fltr_state_t {
+	IAVF_FDIR_FLTR_ADD_REQUEST,	/* User requests to add filter */
+	IAVF_FDIR_FLTR_ADD_PENDING,	/* Filter pending add by the PF */
+	IAVF_FDIR_FLTR_DEL_REQUEST,	/* User requests to delete filter */
+	IAVF_FDIR_FLTR_DEL_PENDING,	/* Filter pending delete by the PF */
+	IAVF_FDIR_FLTR_ACTIVE,		/* Filter is active */
+};
+
+enum iavf_fdir_flow_type {
+	/* NONE - used for undef/error */
+	IAVF_FDIR_FLOW_NONE = 0,
+	IAVF_FDIR_FLOW_IPV4_TCP,
+	IAVF_FDIR_FLOW_IPV4_UDP,
+	IAVF_FDIR_FLOW_IPV4_SCTP,
+	IAVF_FDIR_FLOW_IPV4_AH,
+	IAVF_FDIR_FLOW_IPV4_ESP,
+	IAVF_FDIR_FLOW_IPV4_OTHER,
+	IAVF_FDIR_FLOW_IPV6_TCP,
+	IAVF_FDIR_FLOW_IPV6_UDP,
+	IAVF_FDIR_FLOW_IPV6_SCTP,
+	IAVF_FDIR_FLOW_IPV6_AH,
+	IAVF_FDIR_FLOW_IPV6_ESP,
+	IAVF_FDIR_FLOW_IPV6_OTHER,
+	IAVF_FDIR_FLOW_NON_IP_L2,
+	/* MAX - this must be last and add anything new just above it */
+	IAVF_FDIR_FLOW_PTYPE_MAX,
+};
+
+struct iavf_flex_word {
+	u16 offset;
+	u16 word;
+};
+
+struct iavf_ipv4_addrs {
+	__be32 src_ip;
+	__be32 dst_ip;
+};
+
+struct iavf_ipv6_addrs {
+	struct in6_addr src_ip;
+	struct in6_addr dst_ip;
+};
+
+struct iavf_fdir_eth {
+	__be16 etype;
+};
+
+struct iavf_fdir_ip {
+	union {
+		struct iavf_ipv4_addrs v4_addrs;
+		struct iavf_ipv6_addrs v6_addrs;
+	};
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 l4_header;	/* first 4 bytes of the layer 4 header */
+	__be32 spi;		/* security parameter index for AH/ESP */
+	union {
+		u8 tos;
+		u8 tclass;
+	};
+	u8 proto;
+};
+
+struct iavf_fdir_extra {
+	u32 usr_def[2];
+};
+
+/* bookkeeping of Flow Director filters */
+struct iavf_fdir_fltr {
+	enum iavf_fdir_fltr_state_t state;
+	struct list_head list;
+
+	enum iavf_fdir_flow_type flow_type;
+
+	struct iavf_fdir_eth eth_data;
+	struct iavf_fdir_eth eth_mask;
+
+	struct iavf_fdir_ip ip_data;
+	struct iavf_fdir_ip ip_mask;
+
+	struct iavf_fdir_extra ext_data;
+	struct iavf_fdir_extra ext_mask;
+
+	enum virtchnl_action action;
+
+	/* flex byte filter data */
+	u8 ip_ver; /* used to adjust the flex offset, 4 : IPv4, 6 : IPv6 */
+	u8 flex_cnt;
+	struct iavf_flex_word flex_words[2];
+
+	u32 flow_id;
+
+	u32 loc;	/* Rule location inside the flow table */
+	u32 q_index;
+
+	struct virtchnl_fdir_add vc_add_msg;
+};
+
+int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc);
+#endif /* _IAVF_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index dc5b3c06d1e0..a3268c894d85 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -959,8 +959,9 @@ void iavf_down(struct iavf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct iavf_vlan_filter *vlf;
-	struct iavf_mac_filter *f;
 	struct iavf_cloud_filter *cf;
+	struct iavf_fdir_fltr *fdir;
+	struct iavf_mac_filter *f;
 
 	if (adapter->state <= __IAVF_DOWN_PENDING)
 		return;
@@ -996,6 +997,13 @@ void iavf_down(struct iavf_adapter *adapter)
 	}
 	spin_unlock_bh(&adapter->cloud_filter_list_lock);
 
+	/* remove all Flow Director filters */
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
 	if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
 	    adapter->state != __IAVF_RESETTING) {
 		/* cancel any current operation */
@@ -1007,6 +1015,7 @@ void iavf_down(struct iavf_adapter *adapter)
 		adapter->aq_required = IAVF_FLAG_AQ_DEL_MAC_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
 	}
 
@@ -1629,6 +1638,14 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
 		iavf_add_cloud_filter(adapter);
 		return 0;
 	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_FDIR_FILTER) {
+		iavf_add_fdir_filter(adapter);
+		return IAVF_SUCCESS;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_FDIR_FILTER) {
+		iavf_del_fdir_filter(adapter);
+		return IAVF_SUCCESS;
+	}
 	return -EAGAIN;
 }
 
@@ -2529,7 +2546,7 @@ validate_bw:
 }
 
 /**
- * iavf_validate_channel_config - validate queue mapping info
+ * iavf_validate_ch_config - validate queue mapping info
  * @adapter: board private structure
  * @mqprio_qopt: queue parameters
  *
@@ -3738,10 +3755,12 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	spin_lock_init(&adapter->mac_vlan_list_lock);
 	spin_lock_init(&adapter->cloud_filter_list_lock);
+	spin_lock_init(&adapter->fdir_fltr_lock);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
 	INIT_LIST_HEAD(&adapter->cloud_filter_list);
+	INIT_LIST_HEAD(&adapter->fdir_list_head);
 
 	INIT_WORK(&adapter->reset_task, iavf_reset_task);
 	INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
@@ -3845,6 +3864,7 @@ static void iavf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_fdir_fltr *fdir, *fdirtmp;
 	struct iavf_vlan_filter *vlf, *vlftmp;
 	struct iavf_mac_filter *f, *ftmp;
 	struct iavf_cloud_filter *cf, *cftmp;
@@ -3926,6 +3946,13 @@ static void iavf_remove(struct pci_dev *pdev)
 	}
 	spin_unlock_bh(&adapter->cloud_filter_list_lock);
 
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, list) {
+		list_del(&fdir->list);
+		kfree(fdir);
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index ffaf2742a2e0..d6cba53a3a21 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -2098,7 +2098,7 @@ static int iavf_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
 }
 
 /**
- * iavf_create_tx_ctx Build the Tx context descriptor
+ * iavf_create_tx_ctx - Build the Tx context descriptor
  * @tx_ring:  ring to create the descriptor on
  * @cd_type_cmd_tso_mss: Quad Word 1
  * @cd_tunneling: Quad Word 0 - bits 0-31
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 647e7fde11b4..3069092468b2 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -140,6 +140,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
 	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 	       VIRTCHNL_VF_OFFLOAD_ADQ |
+	       VIRTCHNL_VF_OFFLOAD_FDIR_PF |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
@@ -1005,7 +1006,7 @@ iavf_set_adapter_link_speed_from_vpe(struct iavf_adapter *adapter,
 }
 
 /**
- * iavf_enable_channel
+ * iavf_enable_channels
  * @adapter: adapter structure
  *
  * Request that the PF enable channels as specified by
@@ -1046,7 +1047,7 @@ void iavf_enable_channels(struct iavf_adapter *adapter)
 }
 
 /**
- * iavf_disable_channel
+ * iavf_disable_channels
  * @adapter: adapter structure
  *
  * Request that the PF disable channels that are configured
@@ -1198,6 +1199,101 @@ void iavf_del_cloud_filter(struct iavf_adapter *adapter)
 }
 
 /**
+ * iavf_add_fdir_filter
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF add Flow Director filters as specified
+ * by the user via ethtool.
+ **/
+void iavf_add_fdir_filter(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *fdir;
+	struct virtchnl_fdir_add *f;
+	bool process_fltr = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add Flow Director filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_fdir_add);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) {
+			process_fltr = true;
+			fdir->state = IAVF_FDIR_FLTR_ADD_PENDING;
+			memcpy(f, &fdir->vc_add_msg, len);
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (!process_fltr) {
+		/* prevent iavf_add_fdir_filter() from being called when there
+		 * are no filters to add
+		 */
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+		kfree(f);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_FDIR_FILTER;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_FDIR_FILTER, (u8 *)f, len);
+	kfree(f);
+}
+
+/**
+ * iavf_del_fdir_filter
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF delete Flow Director filters as specified
+ * by the user via ethtool.
+ **/
+void iavf_del_fdir_filter(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *fdir;
+	struct virtchnl_fdir_del f;
+	bool process_fltr = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove Flow Director filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_fdir_del);
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) {
+			process_fltr = true;
+			memset(&f, 0, len);
+			f.vsi_id = fdir->vc_add_msg.vsi_id;
+			f.flow_id = fdir->flow_id;
+			fdir->state = IAVF_FDIR_FLTR_DEL_PENDING;
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (!process_fltr) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		return;
+	}
+
+	adapter->current_op = VIRTCHNL_OP_DEL_FDIR_FILTER;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_FDIR_FILTER, (u8 *)&f, len);
+}
+
+/**
  * iavf_request_reset
  * @adapter: adapter structure
  *
@@ -1357,6 +1453,50 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			}
 			}
 			break;
+		case VIRTCHNL_OP_ADD_FDIR_FILTER: {
+			struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+			spin_lock_bh(&adapter->fdir_fltr_lock);
+			list_for_each_entry_safe(fdir, fdir_tmp,
+						 &adapter->fdir_list_head,
+						 list) {
+				if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
+					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_fdir_fltr(adapter, fdir);
+					if (msglen)
+						dev_err(&adapter->pdev->dev,
+							"%s\n", msg);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				}
+			}
+			spin_unlock_bh(&adapter->fdir_fltr_lock);
+			}
+			break;
+		case VIRTCHNL_OP_DEL_FDIR_FILTER: {
+			struct iavf_fdir_fltr *fdir;
+
+			spin_lock_bh(&adapter->fdir_fltr_lock);
+			list_for_each_entry(fdir, &adapter->fdir_list_head,
+					    list) {
+				if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_fdir_fltr(adapter, fdir);
+				}
+			}
+			spin_unlock_bh(&adapter->fdir_fltr_lock);
+			}
+			break;
+		case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+			dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
 				v_retval, iavf_stat_str(&adapter->hw, v_retval),
@@ -1490,6 +1630,58 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		}
 		}
 		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER: {
+		struct virtchnl_fdir_add *add_fltr = (struct virtchnl_fdir_add *)msg;
+		struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+		spin_lock_bh(&adapter->fdir_fltr_lock);
+		list_for_each_entry_safe(fdir, fdir_tmp,
+					 &adapter->fdir_list_head,
+					 list) {
+			if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
+				if (add_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
+					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is added\n",
+						 fdir->loc);
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					fdir->flow_id = add_fltr->flow_id;
+				} else {
+					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter with status: %d\n",
+						 add_fltr->status);
+					iavf_print_fdir_fltr(adapter, fdir);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				}
+			}
+		}
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		}
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER: {
+		struct virtchnl_fdir_del *del_fltr = (struct virtchnl_fdir_del *)msg;
+		struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+		spin_lock_bh(&adapter->fdir_fltr_lock);
+		list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head,
+					 list) {
+			if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
+				if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
+					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n",
+						 fdir->loc);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				} else {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to delete Flow Director filter with status: %d\n",
+						 del_fltr->status);
+					iavf_print_fdir_fltr(adapter, fdir);
+				}
+			}
+		}
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		}
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 73da4f71f530..f391691e2c7e 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -26,7 +26,7 @@ ice-y := ice_main.o	\
 	 ice_fw_update.o \
 	 ice_lag.o	\
 	 ice_ethtool.o
-ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
+ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o
 ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
 ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
 ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 357706444dd5..9bf346133cbd 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -73,7 +73,7 @@
 #define ICE_MIN_LAN_TXRX_MSIX	1
 #define ICE_MIN_LAN_OICR_MSIX	1
 #define ICE_MIN_MSIX		(ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
-#define ICE_FDIR_MSIX		1
+#define ICE_FDIR_MSIX		2
 #define ICE_NO_VSI		0xffff
 #define ICE_VSI_MAP_CONTIG	0
 #define ICE_VSI_MAP_SCATTER	1
@@ -84,6 +84,8 @@
 #define ICE_MAX_LG_RSS_QS	256
 #define ICE_RES_VALID_BIT	0x8000
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
+/* All VF control VSIs share the same IRQ, so assign a unique ID for them */
+#define ICE_RES_VF_CTRL_VEC_ID	(ICE_RES_MISC_VEC_ID - 1)
 #define ICE_INVAL_Q_INDEX	0xffff
 #define ICE_INVAL_VFID		256
 
@@ -229,6 +231,7 @@ enum ice_state {
 	__ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
 	__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
 	__ICE_PHY_INIT_COMPLETE,
+	__ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
 	__ICE_STATE_NBITS		/* must be last */
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 3d9475e222cd..1898325e62b5 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4373,7 +4373,7 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
 }
 
 /**
- * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl
+ * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl
  * @hw: pointer to HW struct
  */
 bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index e42727941ef5..85c9eccfdae8 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -834,7 +834,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
 }
 
 /**
- * ice_get_ieee_dcb_cfg
+ * ice_get_ieee_or_cee_dcb_cfg
  * @pi: port information structure
  * @dcbx_mode: mode of DCBX (IEEE or CEE)
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 2dcfa685b763..4f738425fb44 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -871,68 +871,47 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
-	char *p = (char *)data;
 	unsigned int i;
+	u8 *p = data;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ICE_VSI_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 ice_gstrings_vsi_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ICE_VSI_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ice_gstrings_vsi_stats[i].stat_string);
 
 		ice_for_each_alloc_txq(vsi, i) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
 		}
 
 		ice_for_each_alloc_rxq(vsi, i) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
 		}
 
 		if (vsi->type != ICE_VSI_PF)
 			return;
 
-		for (i = 0; i < ICE_PF_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 ice_gstrings_pf_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ICE_PF_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ice_gstrings_pf_stats[i].stat_string);
 
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "tx_priority_%u_xon.nic", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "tx_priority_%u_xoff.nic", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i);
+			ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i);
 		}
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "rx_priority_%u_xon.nic", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "rx_priority_%u_xoff.nic", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i);
+			ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i);
 		}
 		break;
 	case ETH_SS_TEST:
 		memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
 		break;
 	case ETH_SS_PRIV_FLAGS:
-		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 ice_gstrings_priv_flags[i].name);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++)
+			ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 192729546bbf..440964defa4a 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1679,6 +1679,10 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
 		input->flex_offset = userdata.flex_offset;
 	}
 
+	input->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+
 	/* input struct is added to the HW filter list */
 	ice_fdir_update_list_entry(pf, input, fsp->location);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 59c0c6a0f8c5..59ef68f072c0 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -40,6 +40,204 @@ static const u8 ice_fdir_ipv4_pkt[] = {
 	0x00, 0x00
 };
 
+static const u8 ice_fdir_udp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x58, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x28, 0x00, 0x00, 0x40, 0x00, 0x40, 0x06,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_icmp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x44, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_l2tpv3_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x73,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_l2tpv3_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x73, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x32,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x32, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_ah_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x33,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_ah_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x33, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_nat_t_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x11, 0x94, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_nat_t_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x11, 0x94, 0x00, 0x00, 0x00, 0x08,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_node_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+	0x00, 0x00, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_session_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+	0x00, 0x00, 0x21, 0x00, 0x00, 0x10, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_node_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+	0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00,
+	0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_session_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+	0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00,
+	0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_non_ip_l2_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
 static const u8 ice_fdir_tcpv6_pkt[] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
@@ -239,6 +437,111 @@ static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
 		sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
 	},
 	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+		sizeof(ice_fdir_udp4_gtpu4_pkt),
+		ice_fdir_udp4_gtpu4_pkt,
+		sizeof(ice_fdir_udp4_gtpu4_pkt),
+		ice_fdir_udp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+		sizeof(ice_fdir_tcp4_gtpu4_pkt),
+		ice_fdir_tcp4_gtpu4_pkt,
+		sizeof(ice_fdir_tcp4_gtpu4_pkt),
+		ice_fdir_tcp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+		sizeof(ice_fdir_icmp4_gtpu4_pkt),
+		ice_fdir_icmp4_gtpu4_pkt,
+		sizeof(ice_fdir_icmp4_gtpu4_pkt),
+		ice_fdir_icmp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+		sizeof(ice_fdir_ipv4_gtpu4_pkt),
+		ice_fdir_ipv4_gtpu4_pkt,
+		sizeof(ice_fdir_ipv4_gtpu4_pkt),
+		ice_fdir_ipv4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+		sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+		sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+		sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+		sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+		sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+		sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+		sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+		sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_AH,
+		sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+		sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_AH,
+		sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+		sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+		sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+		ice_fdir_ipv4_nat_t_esp_pkt,
+		sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+		ice_fdir_ipv4_nat_t_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+		sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+		ice_fdir_ipv6_nat_t_esp_pkt,
+		sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+		ice_fdir_ipv6_nat_t_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+		sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+		ice_fdir_ipv4_pfcp_node_pkt,
+		sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+		ice_fdir_ipv4_pfcp_node_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+		sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+		ice_fdir_ipv4_pfcp_session_pkt,
+		sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+		ice_fdir_ipv4_pfcp_session_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+		sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+		ice_fdir_ipv6_pfcp_node_pkt,
+		sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+		ice_fdir_ipv6_pfcp_node_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
+		sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+		ice_fdir_ipv6_pfcp_session_pkt,
+		sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+		ice_fdir_ipv6_pfcp_session_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NON_IP_L2,
+		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+	},
+	{
 		ICE_FLTR_PTYPE_NONF_IPV6_TCP,
 		sizeof(ice_fdir_tcpv6_pkt), ice_fdir_tcpv6_pkt,
 		sizeof(ice_fdir_tcp6_tun_pkt), ice_fdir_tcp6_tun_pkt,
@@ -374,21 +677,31 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
 	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) {
 		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES;
 		fdir_fltr_ctx.qindex = 0;
+	} else if (input->dest_ctl ==
+		   ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+		fdir_fltr_ctx.qindex = 0;
 	} else {
+		if (input->dest_ctl ==
+		    ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP)
+			fdir_fltr_ctx.toq = input->q_region;
 		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
 		fdir_fltr_ctx.qindex = input->q_index;
 	}
-	fdir_fltr_ctx.cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	fdir_fltr_ctx.cnt_ena = input->cnt_ena;
 	fdir_fltr_ctx.cnt_index = input->cnt_index;
 	fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi);
 	fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE;
-	fdir_fltr_ctx.toq_prio = 3;
+	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER)
+		fdir_fltr_ctx.toq_prio = 0;
+	else
+		fdir_fltr_ctx.toq_prio = 3;
 	fdir_fltr_ctx.pcmd = add ? ICE_FXD_FLTR_QW1_PCMD_ADD :
 		ICE_FXD_FLTR_QW1_PCMD_REMOVE;
 	fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET;
 	fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
-	fdir_fltr_ctx.comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
-	fdir_fltr_ctx.fdid_prio = 3;
+	fdir_fltr_ctx.comp_report = input->comp_report;
+	fdir_fltr_ctx.fdid_prio = input->fdid_prio;
 	fdir_fltr_ctx.desc_prof = 1;
 	fdir_fltr_ctx.desc_prof_prio = 3;
 	ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc);
@@ -471,6 +784,55 @@ static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr)
 }
 
 /**
+ * ice_pkt_insert_u6_qfi - insert a u6 value QFI into a memory buffer for GTPU
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting QFI (6 bits) for GTPU.
+ */
+static void ice_pkt_insert_u6_qfi(u8 *pkt, int offset, u8 data)
+{
+	u8 ret;
+
+	ret = (data & 0x3F) + (*(pkt + offset) & 0xC0);
+	memcpy(pkt + offset, &ret, sizeof(ret));
+}
+
+/**
+ * ice_pkt_insert_u8 - insert a u8 value into a memory buffer.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u8(u8 *pkt, int offset, u8 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u8_tc - insert a u8 value into a memory buffer for TC ipv6.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting Traffic Class (TC) for IPv6,
+ * since that TC is not aligned in number of bytes. Here we split it out
+ * into two part and fill each byte with data copy from pkt, then insert
+ * the two bytes data one by one.
+ */
+static void ice_pkt_insert_u8_tc(u8 *pkt, int offset, u8 data)
+{
+	u8 high, low;
+
+	high = (data >> 4) + (*(pkt + offset) & 0xF0);
+	memcpy(pkt + offset, &high, sizeof(high));
+
+	low = (*(pkt + offset + 1) & 0x0F) + ((data & 0x0F) << 4);
+	memcpy(pkt + offset + 1, &low, sizeof(low));
+}
+
+/**
  * ice_pkt_insert_u16 - insert a be16 value into a memory buffer
  * @pkt: packet buffer
  * @offset: offset into buffer
@@ -493,6 +855,16 @@ static void ice_pkt_insert_u32(u8 *pkt, int offset, __be32 data)
 }
 
 /**
+ * ice_pkt_insert_mac_addr - insert a MAC addr into a memory buffer.
+ * @pkt: packet buffer
+ * @addr: MAC address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_mac_addr(u8 *pkt, u8 *addr)
+{
+	ether_addr_copy(pkt, addr);
+}
+
+/**
  * ice_fdir_get_gen_prgm_pkt - generate a training packet
  * @hw: pointer to the hardware structure
  * @input: flow director filter data structure
@@ -520,11 +892,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		case IPPROTO_SCTP:
 			flow = ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
 			break;
-		case IPPROTO_IP:
+		default:
 			flow = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
 			break;
-		default:
-			return ICE_ERR_PARAM;
 		}
 	} else if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
 		switch (input->ip.v6.proto) {
@@ -537,11 +907,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		case IPPROTO_SCTP:
 			flow = ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
 			break;
-		case IPPROTO_IP:
+		default:
 			flow = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
 			break;
-		default:
-			return ICE_ERR_PARAM;
 		}
 	} else {
 		flow = input->flow_type;
@@ -580,6 +948,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		if (frag)
 			loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF;
 		break;
@@ -592,6 +963,11 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		ice_pkt_insert_mac_addr(loc + ETH_ALEN,
+					input->ext_data.src_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
@@ -602,13 +978,87 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
 				   input->ip.v4.src_ip);
 		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
 				   input->ip.v4.dst_ip);
-		ice_pkt_insert_u16(loc, ICE_IPV4_PROTO_OFFSET, 0);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_u8(loc, ICE_IPV4_PROTO_OFFSET,
+				  input->ip.v4.proto);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_GTPU_TEID_OFFSET,
+				   input->gtpu_data.teid);
+		ice_pkt_insert_u6_qfi(loc, ICE_IPV4_GTPU_QFI_OFFSET,
+				      input->gtpu_data.qfi);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+		ice_pkt_insert_u32(loc, ICE_IPV4_L2TPV3_SESS_ID_OFFSET,
+				   input->l2tpv3_data.session_id);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+		ice_pkt_insert_u32(loc, ICE_IPV6_L2TPV3_SESS_ID_OFFSET,
+				   input->l2tpv3_data.session_id);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_ESP_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV6_ESP_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+		ice_pkt_insert_u32(loc, ICE_IPV4_AH_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+		ice_pkt_insert_u32(loc, ICE_IPV6_AH_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_NAT_T_ESP_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV6_NAT_T_ESP_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NON_IP_L2:
+		ice_pkt_insert_u16(loc, ICE_MAC_ETHTYPE_OFFSET,
+				   input->ext_data.ether_type);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
@@ -619,6 +1069,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
@@ -629,6 +1082,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
@@ -639,12 +1095,20 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
 					 input->ip.v6.src_ip);
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
 					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_u8(loc, ICE_IPV6_PROTO_OFFSET,
+				  input->ip.v6.proto);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	default:
 		return ICE_ERR_PARAM;
@@ -671,7 +1135,7 @@ bool ice_fdir_has_frag(enum ice_fltr_ptype flow)
 }
 
 /**
- * ice_fdir_find_by_idx - find filter with idx
+ * ice_fdir_find_fltr_by_idx - find filter with idx
  * @hw: pointer to hardware structure
  * @fltr_idx: index to find.
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 1c587766daab..d2d40e18ae8a 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -25,6 +25,25 @@
 #define ICE_IPV6_UDP_DST_PORT_OFFSET	56
 #define ICE_IPV6_SCTP_SRC_PORT_OFFSET	54
 #define ICE_IPV6_SCTP_DST_PORT_OFFSET	56
+#define ICE_MAC_ETHTYPE_OFFSET		12
+#define ICE_IPV4_TOS_OFFSET		15
+#define ICE_IPV4_TTL_OFFSET		22
+#define ICE_IPV6_TC_OFFSET		14
+#define ICE_IPV6_HLIM_OFFSET		21
+#define ICE_IPV6_PROTO_OFFSET		20
+#define ICE_IPV4_GTPU_TEID_OFFSET	46
+#define ICE_IPV4_GTPU_QFI_OFFSET	56
+#define ICE_IPV4_L2TPV3_SESS_ID_OFFSET	34
+#define ICE_IPV6_L2TPV3_SESS_ID_OFFSET	54
+#define ICE_IPV4_ESP_SPI_OFFSET		34
+#define ICE_IPV6_ESP_SPI_OFFSET		54
+#define ICE_IPV4_AH_SPI_OFFSET		38
+#define ICE_IPV6_AH_SPI_OFFSET		58
+#define ICE_IPV4_NAT_T_ESP_SPI_OFFSET	42
+#define ICE_IPV6_NAT_T_ESP_SPI_OFFSET	62
+
+#define ICE_FDIR_MAX_FLTRS		16384
+
 /* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF
  * requests that the packet not be fragmented. MF indicates that a packet has
  * been fragmented.
@@ -34,6 +53,8 @@
 enum ice_fltr_prgm_desc_dest {
 	ICE_FLTR_PRGM_DESC_DEST_DROP_PKT,
 	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER,
 };
 
 enum ice_fltr_prgm_desc_fd_status {
@@ -86,6 +107,7 @@ struct ice_fdir_v4 {
 	u8 tos;
 	u8 ip_ver;
 	u8 proto;
+	u8 ttl;
 };
 
 #define ICE_IPV6_ADDR_LEN_AS_U32		4
@@ -99,10 +121,35 @@ struct ice_fdir_v6 {
 	__be32 sec_parm_idx; /* security parameter index */
 	u8 tc;
 	u8 proto;
+	u8 hlim;
+};
+
+struct ice_fdir_udp_gtp {
+	u8 flags;
+	u8 msg_type;
+	__be16 rsrvd_len;
+	__be32 teid;
+	__be16 rsrvd_seq_nbr;
+	u8 rsrvd_n_pdu_nbr;
+	u8 rsrvd_next_ext_type;
+	u8 rsvrd_ext_len;
+	u8	pdu_type:4,
+		spare:4;
+	u8	ppp:1,
+		rqi:1,
+		qfi:6;
+	u32 rsvrd;
+	u8 next_ext;
+};
+
+struct ice_fdir_l2tpv3 {
+	__be32 session_id;
 };
 
 struct ice_fdir_extra {
 	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
+	u8 src_mac[ETH_ALEN];	/* src MAC address */
+	__be16 ether_type;	/* for NON_IP_L2 */
 	u32 usr_def[2];		/* user data */
 	__be16 vlan_type;	/* VLAN ethertype */
 	__be16 vlan_tag;	/* VLAN tag info */
@@ -117,11 +164,19 @@ struct ice_fdir_fltr {
 		struct ice_fdir_v6 v6;
 	} ip, mask;
 
+	struct ice_fdir_udp_gtp gtpu_data;
+	struct ice_fdir_udp_gtp gtpu_mask;
+
+	struct ice_fdir_l2tpv3 l2tpv3_data;
+	struct ice_fdir_l2tpv3 l2tpv3_mask;
+
 	struct ice_fdir_extra ext_data;
 	struct ice_fdir_extra ext_mask;
 
 	/* flex byte filter data */
 	__be16 flex_word;
+	/* queue region size (=2^q_region) */
+	u8 q_region;
 	u16 flex_offset;
 	u16 flex_fltr;
 
@@ -129,9 +184,12 @@ struct ice_fdir_fltr {
 	u16 q_index;
 	u16 dest_vsi;
 	u8 dest_ctl;
+	u8 cnt_ena;
 	u8 fltr_status;
 	u16 cnt_index;
 	u32 fltr_id;
+	u8 fdid_prio;
+	u8 comp_report;
 };
 
 /* Dummy packet filter definition structure */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 5e1fd30c0a0f..afe77f7a3199 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2361,18 +2361,82 @@ ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
 }
 
 /**
- * ice_find_prof_id - find profile ID for a given field vector
+ * ice_prof_has_mask_idx - determine if profile index masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @idx: profile index to check
+ * @mask: mask to match
+ */
+static bool
+ice_prof_has_mask_idx(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 idx,
+		      u16 mask)
+{
+	bool expect_no_mask = false;
+	bool found = false;
+	bool match = false;
+	u16 i;
+
+	/* If mask is 0x0000 or 0xffff, then there is no masking */
+	if (mask == 0 || mask == 0xffff)
+		expect_no_mask = true;
+
+	/* Scan the enabled masks on this profile, for the specified idx */
+	for (i = hw->blk[blk].masks.first; i < hw->blk[blk].masks.first +
+	     hw->blk[blk].masks.count; i++)
+		if (hw->blk[blk].es.mask_ena[prof] & BIT(i))
+			if (hw->blk[blk].masks.masks[i].in_use &&
+			    hw->blk[blk].masks.masks[i].idx == idx) {
+				found = true;
+				if (hw->blk[blk].masks.masks[i].mask == mask)
+					match = true;
+				break;
+			}
+
+	if (expect_no_mask) {
+		if (found)
+			return false;
+	} else {
+		if (!match)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_prof_has_mask - determine if profile masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @masks: masks to match
+ */
+static bool
+ice_prof_has_mask(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 *masks)
+{
+	u16 i;
+
+	/* es->mask_ena[prof] will have the mask */
+	for (i = 0; i < hw->blk[blk].es.fvw; i++)
+		if (!ice_prof_has_mask_idx(hw, blk, prof, i, masks[i]))
+			return false;
+
+	return true;
+}
+
+/**
+ * ice_find_prof_id_with_mask - find profile ID for a given field vector
  * @hw: pointer to the hardware structure
  * @blk: HW block
  * @fv: field vector to search for
+ * @masks: masks for FV
  * @prof_id: receives the profile ID
  */
 static enum ice_status
-ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
-		 struct ice_fv_word *fv, u8 *prof_id)
+ice_find_prof_id_with_mask(struct ice_hw *hw, enum ice_block blk,
+			   struct ice_fv_word *fv, u16 *masks, u8 *prof_id)
 {
 	struct ice_es *es = &hw->blk[blk].es;
-	u16 off;
 	u8 i;
 
 	/* For FD, we don't want to re-use a existed profile with the same
@@ -2382,11 +2446,15 @@ ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
 		return ICE_ERR_DOES_NOT_EXIST;
 
 	for (i = 0; i < (u8)es->count; i++) {
-		off = i * es->fvw;
+		u16 off = i * es->fvw;
 
 		if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv)))
 			continue;
 
+		/* check if masks settings are the same for this profile */
+		if (masks && !ice_prof_has_mask(hw, blk, i, masks))
+			continue;
+
 		*prof_id = i;
 		return 0;
 	}
@@ -2438,20 +2506,22 @@ static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type)
  * ice_alloc_tcam_ent - allocate hardware TCAM entry
  * @hw: pointer to the HW struct
  * @blk: the block to allocate the TCAM for
+ * @btm: true to allocate from bottom of table, false to allocate from top
  * @tcam_idx: pointer to variable to receive the TCAM entry
  *
  * This function allocates a new entry in a Profile ID TCAM for a specific
  * block.
  */
 static enum ice_status
-ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 *tcam_idx)
+ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, bool btm,
+		   u16 *tcam_idx)
 {
 	u16 res_type;
 
 	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
 		return ICE_ERR_PARAM;
 
-	return ice_alloc_hw_res(hw, res_type, 1, true, tcam_idx);
+	return ice_alloc_hw_res(hw, res_type, 1, btm, tcam_idx);
 }
 
 /**
@@ -2537,6 +2607,330 @@ ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
 }
 
 /**
+ * ice_write_prof_mask_reg - write profile mask register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: mask index
+ * @idx: index of the FV which will use the mask
+ * @mask: the 16-bit mask
+ */
+static void
+ice_write_prof_mask_reg(struct ice_hw *hw, enum ice_block blk, u16 mask_idx,
+			u16 idx, u16 mask)
+{
+	u32 offset;
+	u32 val;
+
+	switch (blk) {
+	case ICE_BLK_RSS:
+		offset = GLQF_HMASK(mask_idx);
+		val = (idx << GLQF_HMASK_MSK_INDEX_S) & GLQF_HMASK_MSK_INDEX_M;
+		val |= (mask << GLQF_HMASK_MASK_S) & GLQF_HMASK_MASK_M;
+		break;
+	case ICE_BLK_FD:
+		offset = GLQF_FDMASK(mask_idx);
+		val = (idx << GLQF_FDMASK_MSK_INDEX_S) & GLQF_FDMASK_MSK_INDEX_M;
+		val |= (mask << GLQF_FDMASK_MASK_S) & GLQF_FDMASK_MASK_M;
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+			  blk);
+		return;
+	}
+
+	wr32(hw, offset, val);
+	ice_debug(hw, ICE_DBG_PKG, "write mask, blk %d (%d): %x = %x\n",
+		  blk, idx, offset, val);
+}
+
+/**
+ * ice_write_prof_mask_enable_res - write profile mask enable register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @enable_mask: enable mask
+ */
+static void
+ice_write_prof_mask_enable_res(struct ice_hw *hw, enum ice_block blk,
+			       u16 prof_id, u32 enable_mask)
+{
+	u32 offset;
+
+	switch (blk) {
+	case ICE_BLK_RSS:
+		offset = GLQF_HMASK_SEL(prof_id);
+		break;
+	case ICE_BLK_FD:
+		offset = GLQF_FDMASK_SEL(prof_id);
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+			  blk);
+		return;
+	}
+
+	wr32(hw, offset, enable_mask);
+	ice_debug(hw, ICE_DBG_PKG, "write mask enable, blk %d (%d): %x = %x\n",
+		  blk, prof_id, offset, enable_mask);
+}
+
+/**
+ * ice_init_prof_masks - initial prof masks
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ */
+static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 per_pf;
+	u16 i;
+
+	mutex_init(&hw->blk[blk].masks.lock);
+
+	per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs;
+
+	hw->blk[blk].masks.count = per_pf;
+	hw->blk[blk].masks.first = hw->pf_id * per_pf;
+
+	memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks));
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+		ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+}
+
+/**
+ * ice_init_all_prof_masks - initialize all prof masks
+ * @hw: pointer to the HW struct
+ */
+static void ice_init_all_prof_masks(struct ice_hw *hw)
+{
+	ice_init_prof_masks(hw, ICE_BLK_RSS);
+	ice_init_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_alloc_prof_mask - allocate profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: index of FV which will use the mask
+ * @mask: the 16-bit mask
+ * @mask_idx: variable to receive the mask index
+ */
+static enum ice_status
+ice_alloc_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 idx, u16 mask,
+		    u16 *mask_idx)
+{
+	bool found_unused = false, found_copy = false;
+	enum ice_status status = ICE_ERR_MAX_LIMIT;
+	u16 unused_idx = 0, copy_idx = 0;
+	u16 i;
+
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+		if (hw->blk[blk].masks.masks[i].in_use) {
+			/* if mask is in use and it exactly duplicates the
+			 * desired mask and index, then in can be reused
+			 */
+			if (hw->blk[blk].masks.masks[i].mask == mask &&
+			    hw->blk[blk].masks.masks[i].idx == idx) {
+				found_copy = true;
+				copy_idx = i;
+				break;
+			}
+		} else {
+			/* save off unused index, but keep searching in case
+			 * there is an exact match later on
+			 */
+			if (!found_unused) {
+				found_unused = true;
+				unused_idx = i;
+			}
+		}
+
+	if (found_copy)
+		i = copy_idx;
+	else if (found_unused)
+		i = unused_idx;
+	else
+		goto err_ice_alloc_prof_mask;
+
+	/* update mask for a new entry */
+	if (found_unused) {
+		hw->blk[blk].masks.masks[i].in_use = true;
+		hw->blk[blk].masks.masks[i].mask = mask;
+		hw->blk[blk].masks.masks[i].idx = idx;
+		hw->blk[blk].masks.masks[i].ref = 0;
+		ice_write_prof_mask_reg(hw, blk, i, idx, mask);
+	}
+
+	hw->blk[blk].masks.masks[i].ref++;
+	*mask_idx = i;
+	status = 0;
+
+err_ice_alloc_prof_mask:
+	mutex_unlock(&hw->blk[blk].masks.lock);
+
+	return status;
+}
+
+/**
+ * ice_free_prof_mask - free profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: index of mask
+ */
+static enum ice_status
+ice_free_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 mask_idx)
+{
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return ICE_ERR_PARAM;
+
+	if (!(mask_idx >= hw->blk[blk].masks.first &&
+	      mask_idx < hw->blk[blk].masks.first + hw->blk[blk].masks.count))
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	if (!hw->blk[blk].masks.masks[mask_idx].in_use)
+		goto exit_ice_free_prof_mask;
+
+	if (hw->blk[blk].masks.masks[mask_idx].ref > 1) {
+		hw->blk[blk].masks.masks[mask_idx].ref--;
+		goto exit_ice_free_prof_mask;
+	}
+
+	/* remove mask */
+	hw->blk[blk].masks.masks[mask_idx].in_use = false;
+	hw->blk[blk].masks.masks[mask_idx].mask = 0;
+	hw->blk[blk].masks.masks[mask_idx].idx = 0;
+
+	/* update mask as unused entry */
+	ice_debug(hw, ICE_DBG_PKG, "Free mask, blk %d, mask %d\n", blk,
+		  mask_idx);
+	ice_write_prof_mask_reg(hw, blk, mask_idx, 0, 0);
+
+exit_ice_free_prof_mask:
+	mutex_unlock(&hw->blk[blk].masks.lock);
+
+	return 0;
+}
+
+/**
+ * ice_free_prof_masks - free all profile masks for a profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ */
+static enum ice_status
+ice_free_prof_masks(struct ice_hw *hw, enum ice_block blk, u16 prof_id)
+{
+	u32 mask_bm;
+	u16 i;
+
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return ICE_ERR_PARAM;
+
+	mask_bm = hw->blk[blk].es.mask_ena[prof_id];
+	for (i = 0; i < BITS_PER_BYTE * sizeof(mask_bm); i++)
+		if (mask_bm & BIT(i))
+			ice_free_prof_mask(hw, blk, i);
+
+	return 0;
+}
+
+/**
+ * ice_shutdown_prof_masks - releases lock for masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++) {
+		ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+
+		hw->blk[blk].masks.masks[i].in_use = false;
+		hw->blk[blk].masks.masks[i].idx = 0;
+		hw->blk[blk].masks.masks[i].mask = 0;
+	}
+
+	mutex_unlock(&hw->blk[blk].masks.lock);
+	mutex_destroy(&hw->blk[blk].masks.lock);
+}
+
+/**
+ * ice_shutdown_all_prof_masks - releases all locks for masking
+ * @hw: pointer to the HW struct
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_all_prof_masks(struct ice_hw *hw)
+{
+	ice_shutdown_prof_masks(hw, ICE_BLK_RSS);
+	ice_shutdown_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_update_prof_masking - set registers according to masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @masks: masks
+ */
+static enum ice_status
+ice_update_prof_masking(struct ice_hw *hw, enum ice_block blk, u16 prof_id,
+			u16 *masks)
+{
+	bool err = false;
+	u32 ena_mask = 0;
+	u16 idx;
+	u16 i;
+
+	/* Only support FD and RSS masking, otherwise nothing to be done */
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return 0;
+
+	for (i = 0; i < hw->blk[blk].es.fvw; i++)
+		if (masks[i] && masks[i] != 0xFFFF) {
+			if (!ice_alloc_prof_mask(hw, blk, i, masks[i], &idx)) {
+				ena_mask |= BIT(idx);
+			} else {
+				/* not enough bitmaps */
+				err = true;
+				break;
+			}
+		}
+
+	if (err) {
+		/* free any bitmaps we have allocated */
+		for (i = 0; i < BITS_PER_BYTE * sizeof(ena_mask); i++)
+			if (ena_mask & BIT(i))
+				ice_free_prof_mask(hw, blk, i);
+
+		return ICE_ERR_OUT_OF_RANGE;
+	}
+
+	/* enable the masks for this profile */
+	ice_write_prof_mask_enable_res(hw, blk, prof_id, ena_mask);
+
+	/* store enabled masks with profile so that they can be freed later */
+	hw->blk[blk].es.mask_ena[prof_id] = ena_mask;
+
+	return 0;
+}
+
+/**
  * ice_write_es - write an extraction sequence to hardware
  * @hw: pointer to the HW struct
  * @blk: the block in which to write the extraction sequence
@@ -2575,6 +2969,7 @@ ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
 	if (hw->blk[blk].es.ref_count[prof_id] > 0) {
 		if (!--hw->blk[blk].es.ref_count[prof_id]) {
 			ice_write_es(hw, blk, prof_id, NULL);
+			ice_free_prof_masks(hw, blk, prof_id);
 			return ice_free_prof_id(hw, blk, prof_id);
 		}
 	}
@@ -2937,6 +3332,7 @@ void ice_free_hw_tbls(struct ice_hw *hw)
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t);
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count);
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.mask_ena);
 	}
 
 	list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) {
@@ -2944,6 +3340,7 @@ void ice_free_hw_tbls(struct ice_hw *hw)
 		devm_kfree(ice_hw_to_dev(hw), r);
 	}
 	mutex_destroy(&hw->rss_locks);
+	ice_shutdown_all_prof_masks(hw);
 	memset(hw->blk, 0, sizeof(hw->blk));
 }
 
@@ -2997,6 +3394,7 @@ void ice_clear_hw_tbls(struct ice_hw *hw)
 		memset(es->t, 0, es->count * sizeof(*es->t) * es->fvw);
 		memset(es->ref_count, 0, es->count * sizeof(*es->ref_count));
 		memset(es->written, 0, es->count * sizeof(*es->written));
+		memset(es->mask_ena, 0, es->count * sizeof(*es->mask_ena));
 	}
 }
 
@@ -3010,6 +3408,7 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
 
 	mutex_init(&hw->rss_locks);
 	INIT_LIST_HEAD(&hw->rss_list_head);
+	ice_init_all_prof_masks(hw);
 	for (i = 0; i < ICE_BLK_COUNT; i++) {
 		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
 		struct ice_prof_tcam *prof = &hw->blk[i].prof;
@@ -3112,6 +3511,11 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
 					   sizeof(*es->written), GFP_KERNEL);
 		if (!es->written)
 			goto err;
+
+		es->mask_ena = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					    sizeof(*es->mask_ena), GFP_KERNEL);
+		if (!es->mask_ena)
+			goto err;
 	}
 	return 0;
 
@@ -3711,22 +4115,79 @@ ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
 	return 0;
 }
 
+/* The entries here needs to match the order of enum ice_ptype_attrib */
+static const struct ice_ptype_attrib_info ice_ptype_attributes[] = {
+	{ ICE_GTP_PDU_EH,	ICE_GTP_PDU_FLAG_MASK },
+	{ ICE_GTP_SESSION,	ICE_GTP_FLAGS_MASK },
+	{ ICE_GTP_DOWNLINK,	ICE_GTP_FLAGS_MASK },
+	{ ICE_GTP_UPLINK,	ICE_GTP_FLAGS_MASK },
+};
+
+/**
+ * ice_get_ptype_attrib_info - get PTYPE attribute information
+ * @type: attribute type
+ * @info: pointer to variable to the attribute information
+ */
+static void
+ice_get_ptype_attrib_info(enum ice_ptype_attrib_type type,
+			  struct ice_ptype_attrib_info *info)
+{
+	*info = ice_ptype_attributes[type];
+}
+
+/**
+ * ice_add_prof_attrib - add any PTG with attributes to profile
+ * @prof: pointer to the profile to which PTG entries will be added
+ * @ptg: PTG to be added
+ * @ptype: PTYPE that needs to be looked up
+ * @attr: array of attributes that will be considered
+ * @attr_cnt: number of elements in the attribute array
+ */
+static enum ice_status
+ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype,
+		    const struct ice_ptype_attributes *attr, u16 attr_cnt)
+{
+	bool found = false;
+	u16 i;
+
+	for (i = 0; i < attr_cnt; i++)
+		if (attr[i].ptype == ptype) {
+			found = true;
+
+			prof->ptg[prof->ptg_cnt] = ptg;
+			ice_get_ptype_attrib_info(attr[i].attrib,
+						  &prof->attr[prof->ptg_cnt]);
+
+			if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+				return ICE_ERR_MAX_LIMIT;
+		}
+
+	if (!found)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	return 0;
+}
+
 /**
  * ice_add_prof - add profile
  * @hw: pointer to the HW struct
  * @blk: hardware block
  * @id: profile tracking ID
  * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits)
+ * @attr: array of attributes
+ * @attr_cnt: number of elements in attr array
  * @es: extraction sequence (length of array is determined by the block)
+ * @masks: mask for extraction sequence
  *
- * This function registers a profile, which matches a set of PTGs with a
+ * This function registers a profile, which matches a set of PTYPES with a
  * particular extraction sequence. While the hardware profile is allocated
  * it will not be written until the first call to ice_add_flow that specifies
  * the ID value used here.
  */
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
-	     struct ice_fv_word *es)
+	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
+	     struct ice_fv_word *es, u16 *masks)
 {
 	u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
 	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
@@ -3740,7 +4201,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 	mutex_lock(&hw->blk[blk].es.prof_map_lock);
 
 	/* search for existing profile */
-	status = ice_find_prof_id(hw, blk, es, &prof_id);
+	status = ice_find_prof_id_with_mask(hw, blk, es, masks, &prof_id);
 	if (status) {
 		/* allocate profile ID */
 		status = ice_alloc_prof_id(hw, blk, &prof_id);
@@ -3758,6 +4219,9 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 			if (status)
 				goto err_ice_add_prof;
 		}
+		status = ice_update_prof_masking(hw, blk, prof_id, masks);
+		if (status)
+			goto err_ice_add_prof;
 
 		/* and write new es */
 		ice_write_es(hw, blk, prof_id, es);
@@ -3792,7 +4256,6 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 				 BITS_PER_BYTE) {
 			u16 ptype;
 			u8 ptg;
-			u8 m;
 
 			ptype = byte * BITS_PER_BYTE + bit;
 
@@ -3807,15 +4270,25 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 				continue;
 
 			set_bit(ptg, ptgs_used);
-			prof->ptg[prof->ptg_cnt] = ptg;
-
-			if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+			/* Check to see there are any attributes for
+			 * this PTYPE, and add them if found.
+			 */
+			status = ice_add_prof_attrib(prof, ptg, ptype,
+						     attr, attr_cnt);
+			if (status == ICE_ERR_MAX_LIMIT)
 				break;
+			if (status) {
+				/* This is simple a PTYPE/PTG with no
+				 * attribute
+				 */
+				prof->ptg[prof->ptg_cnt] = ptg;
+				prof->attr[prof->ptg_cnt].flags = 0;
+				prof->attr[prof->ptg_cnt].mask = 0;
 
-			/* nothing left in byte, then exit */
-			m = ~(u8)((1 << (bit + 1)) - 1);
-			if (!(ptypes[byte] & m))
-				break;
+				if (++prof->ptg_cnt >=
+				    ICE_MAX_PTG_PER_PROFILE)
+					break;
+			}
 		}
 
 		bytes--;
@@ -4326,7 +4799,12 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
 	}
 
 	/* for re-enabling, reallocate a TCAM */
-	status = ice_alloc_tcam_ent(hw, blk, &tcam->tcam_idx);
+	/* for entries with empty attribute masks, allocate entry from
+	 * the bottom of the TCAM table; otherwise, allocate from the
+	 * top of the table in order to give it higher priority
+	 */
+	status = ice_alloc_tcam_ent(hw, blk, tcam->attr.mask == 0,
+				    &tcam->tcam_idx);
 	if (status)
 		return status;
 
@@ -4336,8 +4814,8 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
 		return ICE_ERR_NO_MEMORY;
 
 	status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id,
-				      tcam->ptg, vsig, 0, 0, vl_msk, dc_msk,
-				      nm_msk);
+				      tcam->ptg, vsig, 0, tcam->attr.flags,
+				      vl_msk, dc_msk, nm_msk);
 	if (status)
 		goto err_ice_prof_tcam_ena_dis;
 
@@ -4485,7 +4963,12 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
 		}
 
 		/* allocate the TCAM entry index */
-		status = ice_alloc_tcam_ent(hw, blk, &tcam_idx);
+		/* for entries with empty attribute masks, allocate entry from
+		 * the bottom of the TCAM table; otherwise, allocate from the
+		 * top of the table in order to give it higher priority
+		 */
+		status = ice_alloc_tcam_ent(hw, blk, map->attr[i].mask == 0,
+					    &tcam_idx);
 		if (status) {
 			devm_kfree(ice_hw_to_dev(hw), p);
 			goto err_ice_add_prof_id_vsig;
@@ -4494,6 +4977,7 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
 		t->tcam[i].ptg = map->ptg[i];
 		t->tcam[i].prof_id = map->prof_id;
 		t->tcam[i].tcam_idx = tcam_idx;
+		t->tcam[i].attr = map->attr[i];
 		t->tcam[i].in_use = true;
 
 		p->type = ICE_TCAM_ADD;
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index 20deddb807c5..8a58e79729b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -27,7 +27,8 @@ int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
 
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
-	     struct ice_fv_word *es);
+	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
+	     struct ice_fv_word *es, u16 *masks);
 enum ice_status
 ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index 24063c1351b2..abc156ce9d8c 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -190,6 +190,64 @@ enum ice_sect {
 	ICE_SECT_COUNT
 };
 
+#define ICE_MAC_IPV4_GTPU_IPV4_FRAG	331
+#define ICE_MAC_IPV4_GTPU_IPV4_PAY	332
+#define ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY	333
+#define ICE_MAC_IPV4_GTPU_IPV4_TCP	334
+#define ICE_MAC_IPV4_GTPU_IPV4_ICMP	335
+#define ICE_MAC_IPV6_GTPU_IPV4_FRAG	336
+#define ICE_MAC_IPV6_GTPU_IPV4_PAY	337
+#define ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY	338
+#define ICE_MAC_IPV6_GTPU_IPV4_TCP	339
+#define ICE_MAC_IPV6_GTPU_IPV4_ICMP	340
+#define ICE_MAC_IPV4_GTPU_IPV6_FRAG	341
+#define ICE_MAC_IPV4_GTPU_IPV6_PAY	342
+#define ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY	343
+#define ICE_MAC_IPV4_GTPU_IPV6_TCP	344
+#define ICE_MAC_IPV4_GTPU_IPV6_ICMPV6	345
+#define ICE_MAC_IPV6_GTPU_IPV6_FRAG	346
+#define ICE_MAC_IPV6_GTPU_IPV6_PAY	347
+#define ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY	348
+#define ICE_MAC_IPV6_GTPU_IPV6_TCP	349
+#define ICE_MAC_IPV6_GTPU_IPV6_ICMPV6	350
+
+/* Attributes that can modify PTYPE definitions.
+ *
+ * These values will represent special attributes for PTYPEs, which will
+ * resolve into metadata packet flags definitions that can be used in the TCAM
+ * for identifying a PTYPE with specific characteristics.
+ */
+enum ice_ptype_attrib_type {
+	/* GTP PTYPEs */
+	ICE_PTYPE_ATTR_GTP_PDU_EH,
+	ICE_PTYPE_ATTR_GTP_SESSION,
+	ICE_PTYPE_ATTR_GTP_DOWNLINK,
+	ICE_PTYPE_ATTR_GTP_UPLINK,
+};
+
+struct ice_ptype_attrib_info {
+	u16 flags;
+	u16 mask;
+};
+
+/* TCAM flag definitions */
+#define ICE_GTP_PDU			BIT(14)
+#define ICE_GTP_PDU_LINK		BIT(13)
+
+/* GTP attributes */
+#define ICE_GTP_PDU_FLAG_MASK		(ICE_GTP_PDU)
+#define ICE_GTP_PDU_EH			ICE_GTP_PDU
+
+#define ICE_GTP_FLAGS_MASK		(ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+#define ICE_GTP_SESSION			0
+#define ICE_GTP_DOWNLINK		ICE_GTP_PDU
+#define ICE_GTP_UPLINK			(ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+
+struct ice_ptype_attributes {
+	u16 ptype;
+	enum ice_ptype_attrib_type attrib;
+};
+
 /* package labels */
 struct ice_label {
 	__le16 value;
@@ -335,6 +393,7 @@ struct ice_es {
 	u16 count;
 	u16 fvw;
 	u16 *ref_count;
+	u32 *mask_ena;
 	struct list_head prof_map;
 	struct ice_fv_word *t;
 	struct mutex prof_map_lock;	/* protect access to profiles list */
@@ -372,12 +431,14 @@ struct ice_prof_map {
 	u8 prof_id;
 	u8 ptg_cnt;
 	u8 ptg[ICE_MAX_PTG_PER_PROFILE];
+	struct ice_ptype_attrib_info attr[ICE_MAX_PTG_PER_PROFILE];
 };
 
 #define ICE_INVALID_TCAM	0xFFFF
 
 struct ice_tcam_inf {
 	u16 tcam_idx;
+	struct ice_ptype_attrib_info attr;
 	u8 ptg;
 	u8 prof_id;
 	u8 in_use;
@@ -478,6 +539,21 @@ struct ice_prof_redir {
 	u16 count;
 };
 
+struct ice_mask {
+	u16 mask;	/* 16-bit mask */
+	u16 idx;	/* index */
+	u16 ref;	/* reference count */
+	u8 in_use;	/* non-zero if used */
+};
+
+struct ice_masks {
+	struct mutex lock; /* lock to protect this structure */
+	u16 first;	/* first mask owned by the PF */
+	u16 count;	/* number of masks owned by the PF */
+#define ICE_PROF_MASK_COUNT 32
+	struct ice_mask masks[ICE_PROF_MASK_COUNT];
+};
+
 /* Tables per block */
 struct ice_blk_info {
 	struct ice_xlt1 xlt1;
@@ -485,6 +561,7 @@ struct ice_blk_info {
 	struct ice_prof_tcam prof;
 	struct ice_prof_redir prof_redir;
 	struct ice_es es;
+	struct ice_masks masks;
 	u8 overwrite; /* set to true to allow overwrite of table entries */
 	u8 is_list_init;
 };
@@ -513,6 +590,7 @@ struct ice_chs_chg {
 	u16 vsig;
 	u16 orig_vsig;
 	u16 tcam_idx;
+	struct ice_ptype_attrib_info attr;
 };
 
 #define ICE_FLOW_PTYPE_MAX		ICE_XLT1_CNT
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 89a0cef20506..8e8bfc6fa2b4 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -9,18 +9,50 @@ struct ice_flow_field_info {
 	enum ice_flow_seg_hdr hdr;
 	s16 off;	/* Offset from start of a protocol header, in bits */
 	u16 size;	/* Size of fields in bits */
+	u16 mask;	/* 16-bit mask for field */
 };
 
 #define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \
 	.hdr = _hdr, \
 	.off = (_offset_bytes) * BITS_PER_BYTE, \
 	.size = (_size_bytes) * BITS_PER_BYTE, \
+	.mask = 0, \
+}
+
+#define ICE_FLOW_FLD_INFO_MSK(_hdr, _offset_bytes, _size_bytes, _mask) { \
+	.hdr = _hdr, \
+	.off = (_offset_bytes) * BITS_PER_BYTE, \
+	.size = (_size_bytes) * BITS_PER_BYTE, \
+	.mask = _mask, \
 }
 
 /* Table containing properties of supported protocol header fields */
 static const
 struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
+	/* Ether */
+	/* ICE_FLOW_FIELD_IDX_ETH_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ETH_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, ETH_ALEN, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_S_VLAN */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 12, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_C_VLAN */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 14, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_ETH_TYPE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, sizeof(__be16)),
 	/* IPv4 / IPv6 */
+	/* ICE_FLOW_FIELD_IDX_IPV4_DSCP */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV4, 0, 1, 0x00fc),
+	/* ICE_FLOW_FIELD_IDX_IPV6_DSCP */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV6, 0, 1, 0x0ff0),
+	/* ICE_FLOW_FIELD_IDX_IPV4_TTL */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0xff00),
+	/* ICE_FLOW_FIELD_IDX_IPV4_PROT */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0x00ff),
+	/* ICE_FLOW_FIELD_IDX_IPV6_TTL */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0x00ff),
+	/* ICE_FLOW_FIELD_IDX_IPV6_PROT */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0xff00),
 	/* ICE_FLOW_FIELD_IDX_IPV4_SA */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)),
 	/* ICE_FLOW_FIELD_IDX_IPV4_DA */
@@ -42,21 +74,111 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)),
 	/* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_TCP_FLAGS */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, 1),
+	/* ARP */
+	/* ICE_FLOW_FIELD_IDX_ARP_SIP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 14, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_ARP_DIP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 24, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_ARP_SHA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 8, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ARP_DHA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 18, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ARP_OP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 6, sizeof(__be16)),
+	/* ICMP */
+	/* ICE_FLOW_FIELD_IDX_ICMP_TYPE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 0, 1),
+	/* ICE_FLOW_FIELD_IDX_ICMP_CODE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 1, 1),
 	/* GRE */
 	/* ICE_FLOW_FIELD_IDX_GRE_KEYID */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12,
 			  sizeof_field(struct gre_full_hdr, key)),
+	/* GTP */
+	/* ICE_FLOW_FIELD_IDX_GTPC_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPC_TEID, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_IP_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_IP, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_EH_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_EH, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_EH_QFI */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_EH, 22, sizeof(__be16),
+			      0x3f00),
+	/* ICE_FLOW_FIELD_IDX_GTPU_UP_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, sizeof(__be32)),
+	/* PPPoE */
+	/* ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PPPOE, 2, sizeof(__be16)),
+	/* PFCP */
+	/* ICE_FLOW_FIELD_IDX_PFCP_SEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PFCP_SESSION, 12, sizeof(__be64)),
+	/* L2TPv3 */
+	/* ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV3, 0, sizeof(__be32)),
+	/* ESP */
+	/* ICE_FLOW_FIELD_IDX_ESP_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ESP, 0, sizeof(__be32)),
+	/* AH */
+	/* ICE_FLOW_FIELD_IDX_AH_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_AH, 4, sizeof(__be32)),
+	/* NAT_T_ESP */
+	/* ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, sizeof(__be32)),
 };
 
 /* Bitmaps indicating relevant packet types for a particular protocol header
  *
- * Packet types for packets with an Outer/First/Single IPv4 header
+ * Packet types for packets with an Outer/First/Single MAC header
+ */
+static const u32 ice_ptypes_mac_ofos[] = {
+	0xFDC00846, 0xBFBF7F7E, 0xF70001DF, 0xFEFDFDFB,
+	0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+	0x00400000, 0x03FFF000, 0x7FFFFFE0, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last MAC VLAN header */
+static const u32 ice_ptypes_macvlan_il[] = {
+	0x00000000, 0xBC000000, 0x000001DF, 0xF0000000,
+	0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header, does NOT
+ * include IPv4 other PTYPEs
  */
 static const u32 ice_ptypes_ipv4_ofos[] = {
 	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000155, 0x00000000, 0x00000000,
+	0x00000000, 0x000FC000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header, includes
+ * IPv4 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv4_ofos_all[] = {
+	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000155, 0x00000000, 0x00000000,
+	0x00000000, 0x000FC000, 0x83E0F800, 0x00000101,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -67,7 +189,7 @@ static const u32 ice_ptypes_ipv4_ofos[] = {
 static const u32 ice_ptypes_ipv4_il[] = {
 	0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B,
 	0x0000000E, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x001FF800, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -75,12 +197,28 @@ static const u32 ice_ptypes_ipv4_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
-/* Packet types for packets with an Outer/First/Single IPv6 header */
+/* Packet types for packets with an Outer/First/Single IPv6 header, does NOT
+ * include IPv6 other PTYPEs
+ */
 static const u32 ice_ptypes_ipv6_ofos[] = {
 	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+	0x00000000, 0x03F00000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header, includes
+ * IPv6 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv6_ofos_all[] = {
+	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+	0x00080F00, 0x03F00000, 0x7C1F0000, 0x00000206,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -91,7 +229,7 @@ static const u32 ice_ptypes_ipv6_ofos[] = {
 static const u32 ice_ptypes_ipv6_il[] = {
 	0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000,
 	0x00000770, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x7FE00000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -111,6 +249,18 @@ static const u32 ice_ipv4_ofos_no_l4[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for packets with an Outermost/First ARP header */
+static const u32 ice_ptypes_arp_of[] = {
+	0x00000800, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Packet types for packets with an Innermost/Last IPv4 header - no L4 */
 static const u32 ice_ipv4_il_no_l4[] = {
 	0x60000000, 0x18043008, 0x80000002, 0x6010c021,
@@ -153,7 +303,7 @@ static const u32 ice_ipv6_il_no_l4[] = {
 static const u32 ice_ptypes_udp_il[] = {
 	0x81000000, 0x20204040, 0x04000010, 0x80810102,
 	0x00000040, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00410000, 0x90842000, 0x00000007,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -165,7 +315,7 @@ static const u32 ice_ptypes_udp_il[] = {
 static const u32 ice_ptypes_tcp_il[] = {
 	0x04000000, 0x80810102, 0x10000040, 0x02040408,
 	0x00000102, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00820000, 0x21084000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -177,6 +327,18 @@ static const u32 ice_ptypes_tcp_il[] = {
 static const u32 ice_ptypes_sctp_il[] = {
 	0x08000000, 0x01020204, 0x20000081, 0x04080810,
 	0x00000204, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x01040000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First ICMP header */
+static const u32 ice_ptypes_icmp_of[] = {
+	0x10000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -185,6 +347,18 @@ static const u32 ice_ptypes_sctp_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for packets with an Innermost/Last ICMP header */
+static const u32 ice_ptypes_icmp_il[] = {
+	0x00000000, 0x02040408, 0x40000102, 0x08101020,
+	0x00000408, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x42108000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Packet types for packets with an Outermost/First GRE header */
 static const u32 ice_ptypes_gre_of[] = {
 	0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000,
@@ -197,6 +371,218 @@ static const u32 ice_ptypes_gre_of[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for packets with an Innermost/Last MAC header */
+static const u32 ice_ptypes_mac_il[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPC */
+static const u32 ice_ptypes_gtpc[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000180, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPC with TEID */
+static const u32 ice_ptypes_gtpc_tid[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000060, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPU */
+static const struct ice_ptype_attributes ice_attr_gtpu_eh[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_PDU_EH },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_down[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_up[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_UPLINK },
+};
+
+static const u32 ice_ptypes_gtpu[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x7FFFFE00, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for PPPoE */
+static const u32 ice_ptypes_pppoe[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x03ffe000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP NODE header */
+static const u32 ice_ptypes_pfcp_node[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x80000000, 0x00000002,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP SESSION header */
+static const u32 ice_ptypes_pfcp_session[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000005,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for L2TPv3 */
+static const u32 ice_ptypes_l2tpv3[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000300,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for ESP */
+static const u32 ice_ptypes_esp[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000003, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for AH */
+static const u32 ice_ptypes_ah[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x0000000C, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with NAT_T ESP header */
+static const u32 ice_ptypes_nat_t_esp[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000030, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+static const u32 ice_ptypes_mac_non_ip_ofos[] = {
+	0x00000846, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00400000, 0x03FFF000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Manage parameters and info. used during the creation of a flow profile */
 struct ice_flow_prof_params {
 	enum ice_block blk;
@@ -208,12 +594,30 @@ struct ice_flow_prof_params {
 	 * This will give us the direction flags.
 	 */
 	struct ice_fv_word es[ICE_MAX_FV_WORDS];
+	/* attributes can be used to add attributes to a particular PTYPE */
+	const struct ice_ptype_attributes *attr;
+	u16 attr_cnt;
+
+	u16 mask[ICE_MAX_FV_WORDS];
 	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
 };
 
+#define ICE_FLOW_RSS_HDRS_INNER_MASK \
+	(ICE_FLOW_SEG_HDR_PPPOE | ICE_FLOW_SEG_HDR_GTPC | \
+	ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_GTPU | \
+	ICE_FLOW_SEG_HDR_PFCP_SESSION | ICE_FLOW_SEG_HDR_L2TPV3 | \
+	ICE_FLOW_SEG_HDR_ESP | ICE_FLOW_SEG_HDR_AH | \
+	ICE_FLOW_SEG_HDR_NAT_T_ESP)
+
+#define ICE_FLOW_SEG_HDRS_L2_MASK	\
+	(ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN)
 #define ICE_FLOW_SEG_HDRS_L3_MASK	\
-	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_ARP)
 #define ICE_FLOW_SEG_HDRS_L4_MASK	\
+	(ICE_FLOW_SEG_HDR_ICMP | ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | \
+	 ICE_FLOW_SEG_HDR_SCTP)
+/* mask for L4 protocols that are NOT part of IPv4/6 OTHER PTYPE groups */
+#define ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER	\
 	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
 
 /**
@@ -243,8 +647,11 @@ ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
 
 /* Sizes of fixed known protocol headers without header options */
 #define ICE_FLOW_PROT_HDR_SZ_MAC	14
+#define ICE_FLOW_PROT_HDR_SZ_MAC_VLAN	(ICE_FLOW_PROT_HDR_SZ_MAC + 2)
 #define ICE_FLOW_PROT_HDR_SZ_IPV4	20
 #define ICE_FLOW_PROT_HDR_SZ_IPV6	40
+#define ICE_FLOW_PROT_HDR_SZ_ARP	28
+#define ICE_FLOW_PROT_HDR_SZ_ICMP	8
 #define ICE_FLOW_PROT_HDR_SZ_TCP	20
 #define ICE_FLOW_PROT_HDR_SZ_UDP	8
 #define ICE_FLOW_PROT_HDR_SZ_SCTP	12
@@ -256,16 +663,27 @@ ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
  */
 static u16 ice_flow_calc_seg_sz(struct ice_flow_prof_params *params, u8 seg)
 {
-	u16 sz = ICE_FLOW_PROT_HDR_SZ_MAC;
+	u16 sz;
+
+	/* L2 headers */
+	sz = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_VLAN) ?
+		ICE_FLOW_PROT_HDR_SZ_MAC_VLAN : ICE_FLOW_PROT_HDR_SZ_MAC;
 
 	/* L3 headers */
 	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4)
 		sz += ICE_FLOW_PROT_HDR_SZ_IPV4;
 	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV6)
 		sz += ICE_FLOW_PROT_HDR_SZ_IPV6;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ARP)
+		sz += ICE_FLOW_PROT_HDR_SZ_ARP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)
+		/* An L3 header is required if L4 is specified */
+		return 0;
 
 	/* L4 headers */
-	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ICMP)
+		sz += ICE_FLOW_PROT_HDR_SZ_ICMP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
 		sz += ICE_FLOW_PROT_HDR_SZ_TCP;
 	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_UDP)
 		sz += ICE_FLOW_PROT_HDR_SZ_UDP;
@@ -298,8 +716,39 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 
 		hdrs = prof->segs[i].hdrs;
 
+		if (hdrs & ICE_FLOW_SEG_HDR_ETH) {
+			src = !i ? (const unsigned long *)ice_ptypes_mac_ofos :
+				(const unsigned long *)ice_ptypes_mac_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (i && hdrs & ICE_FLOW_SEG_HDR_VLAN) {
+			src = (const unsigned long *)ice_ptypes_macvlan_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (!i && hdrs & ICE_FLOW_SEG_HDR_ARP) {
+			bitmap_and(params->ptypes, params->ptypes,
+				   (const unsigned long *)ice_ptypes_arp_of,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
 		if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
-		    !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+		    (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+			src = i ? (const unsigned long *)ice_ptypes_ipv4_il :
+				(const unsigned long *)ice_ptypes_ipv4_ofos_all;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+			   (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+			src = i ? (const unsigned long *)ice_ptypes_ipv6_il :
+				(const unsigned long *)ice_ptypes_ipv6_ofos_all;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
 			src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 :
 				(const unsigned long *)ice_ipv4_il_no_l4;
 			bitmap_and(params->ptypes, params->ptypes, src,
@@ -310,7 +759,7 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
 		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
-			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
 			src = !i ? (const unsigned long *)ice_ipv6_ofos_no_l4 :
 				(const unsigned long *)ice_ipv6_il_no_l4;
 			bitmap_and(params->ptypes, params->ptypes, src,
@@ -322,6 +771,20 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 				   ICE_FLOW_PTYPE_MAX);
 		}
 
+		if (hdrs & ICE_FLOW_SEG_HDR_ETH_NON_IP) {
+			src = (const unsigned long *)ice_ptypes_mac_non_ip_ofos;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_PPPOE) {
+			src = (const unsigned long *)ice_ptypes_pppoe;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else {
+			src = (const unsigned long *)ice_ptypes_pppoe;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
+		}
+
 		if (hdrs & ICE_FLOW_SEG_HDR_UDP) {
 			src = (const unsigned long *)ice_ptypes_udp_il;
 			bitmap_and(params->ptypes, params->ptypes, src,
@@ -334,12 +797,89 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 			src = (const unsigned long *)ice_ptypes_sctp_il;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_ICMP) {
+			src = !i ? (const unsigned long *)ice_ptypes_icmp_of :
+				(const unsigned long *)ice_ptypes_icmp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_GRE) {
 			if (!i) {
 				src = (const unsigned long *)ice_ptypes_gre_of;
 				bitmap_and(params->ptypes, params->ptypes,
 					   src, ICE_FLOW_PTYPE_MAX);
 			}
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPC) {
+			src = (const unsigned long *)ice_ptypes_gtpc;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPC_TEID) {
+			src = (const unsigned long *)ice_ptypes_gtpc_tid;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_DWN) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with downlink */
+			params->attr = ice_attr_gtpu_down;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_down);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with uplink */
+			params->attr = ice_attr_gtpu_up;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_up);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with Extension Header */
+			params->attr = ice_attr_gtpu_eh;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_eh);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_L2TPV3) {
+			src = (const unsigned long *)ice_ptypes_l2tpv3;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_ESP) {
+			src = (const unsigned long *)ice_ptypes_esp;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_AH) {
+			src = (const unsigned long *)ice_ptypes_ah;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_NAT_T_ESP) {
+			src = (const unsigned long *)ice_ptypes_nat_t_esp;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_PFCP) {
+			if (hdrs & ICE_FLOW_SEG_HDR_PFCP_NODE)
+				src = (const unsigned long *)ice_ptypes_pfcp_node;
+			else
+				src = (const unsigned long *)ice_ptypes_pfcp_session;
+
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else {
+			src = (const unsigned long *)ice_ptypes_pfcp_node;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
+
+			src = (const unsigned long *)ice_ptypes_pfcp_session;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
 		}
 	}
 
@@ -352,6 +892,7 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
  * @params: information about the flow to be processed
  * @seg: packet segment index of the field to be extracted
  * @fld: ID of field to be extracted
+ * @match: bit field of all fields
  *
  * This function determines the protocol ID, offset, and size of the given
  * field. It then allocates one or more extraction sequence entries for the
@@ -359,17 +900,73 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
  */
 static enum ice_status
 ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
-		    u8 seg, enum ice_flow_field fld)
+		    u8 seg, enum ice_flow_field fld, u64 match)
 {
+	enum ice_flow_field sib = ICE_FLOW_FIELD_IDX_MAX;
 	enum ice_prot_id prot_id = ICE_PROT_ID_INVAL;
 	u8 fv_words = hw->blk[params->blk].es.fvw;
 	struct ice_flow_fld_info *flds;
 	u16 cnt, ese_bits, i;
+	u16 sib_mask = 0;
+	u16 mask;
 	u16 off;
 
 	flds = params->prof->segs[seg].fields;
 
 	switch (fld) {
+	case ICE_FLOW_FIELD_IDX_ETH_DA:
+	case ICE_FLOW_FIELD_IDX_ETH_SA:
+	case ICE_FLOW_FIELD_IDX_S_VLAN:
+	case ICE_FLOW_FIELD_IDX_C_VLAN:
+		prot_id = seg == 0 ? ICE_PROT_MAC_OF_OR_S : ICE_PROT_MAC_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_ETH_TYPE:
+		prot_id = seg == 0 ? ICE_PROT_ETYPE_OL : ICE_PROT_ETYPE_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV4_DSCP:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_DSCP:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV4_TTL:
+	case ICE_FLOW_FIELD_IDX_IPV4_PROT:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+
+		/* TTL and PROT share the same extraction seq. entry.
+		 * Each is considered a sibling to the other in terms of sharing
+		 * the same extraction sequence entry.
+		 */
+		if (fld == ICE_FLOW_FIELD_IDX_IPV4_TTL)
+			sib = ICE_FLOW_FIELD_IDX_IPV4_PROT;
+		else if (fld == ICE_FLOW_FIELD_IDX_IPV4_PROT)
+			sib = ICE_FLOW_FIELD_IDX_IPV4_TTL;
+
+		/* If the sibling field is also included, that field's
+		 * mask needs to be included.
+		 */
+		if (match & BIT(sib))
+			sib_mask = ice_flds_info[sib].mask;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_TTL:
+	case ICE_FLOW_FIELD_IDX_IPV6_PROT:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+
+		/* TTL and PROT share the same extraction seq. entry.
+		 * Each is considered a sibling to the other in terms of sharing
+		 * the same extraction sequence entry.
+		 */
+		if (fld == ICE_FLOW_FIELD_IDX_IPV6_TTL)
+			sib = ICE_FLOW_FIELD_IDX_IPV6_PROT;
+		else if (fld == ICE_FLOW_FIELD_IDX_IPV6_PROT)
+			sib = ICE_FLOW_FIELD_IDX_IPV6_TTL;
+
+		/* If the sibling field is also included, that field's
+		 * mask needs to be included.
+		 */
+		if (match & BIT(sib))
+			sib_mask = ice_flds_info[sib].mask;
+		break;
 	case ICE_FLOW_FIELD_IDX_IPV4_SA:
 	case ICE_FLOW_FIELD_IDX_IPV4_DA:
 		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
@@ -380,6 +977,7 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 		break;
 	case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT:
 	case ICE_FLOW_FIELD_IDX_TCP_DST_PORT:
+	case ICE_FLOW_FIELD_IDX_TCP_FLAGS:
 		prot_id = ICE_PROT_TCP_IL;
 		break;
 	case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT:
@@ -390,6 +988,49 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 	case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
 		prot_id = ICE_PROT_SCTP_IL;
 		break;
+	case ICE_FLOW_FIELD_IDX_GTPC_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_IP_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_UP_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_EH_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_EH_QFI:
+		/* GTP is accessed through UDP OF protocol */
+		prot_id = ICE_PROT_UDP_OF;
+		break;
+	case ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID:
+		prot_id = ICE_PROT_PPPOE;
+		break;
+	case ICE_FLOW_FIELD_IDX_PFCP_SEID:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID:
+		prot_id = ICE_PROT_L2TPV3;
+		break;
+	case ICE_FLOW_FIELD_IDX_ESP_SPI:
+		prot_id = ICE_PROT_ESP_F;
+		break;
+	case ICE_FLOW_FIELD_IDX_AH_SPI:
+		prot_id = ICE_PROT_ESP_2;
+		break;
+	case ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_ARP_SIP:
+	case ICE_FLOW_FIELD_IDX_ARP_DIP:
+	case ICE_FLOW_FIELD_IDX_ARP_SHA:
+	case ICE_FLOW_FIELD_IDX_ARP_DHA:
+	case ICE_FLOW_FIELD_IDX_ARP_OP:
+		prot_id = ICE_PROT_ARP_OF;
+		break;
+	case ICE_FLOW_FIELD_IDX_ICMP_TYPE:
+	case ICE_FLOW_FIELD_IDX_ICMP_CODE:
+		/* ICMP type and code share the same extraction seq. entry */
+		prot_id = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4) ?
+				ICE_PROT_ICMP_IL : ICE_PROT_ICMPV6_IL;
+		sib = fld == ICE_FLOW_FIELD_IDX_ICMP_TYPE ?
+			ICE_FLOW_FIELD_IDX_ICMP_CODE :
+			ICE_FLOW_FIELD_IDX_ICMP_TYPE;
+		break;
 	case ICE_FLOW_FIELD_IDX_GRE_KEYID:
 		prot_id = ICE_PROT_GRE_OF;
 		break;
@@ -407,6 +1048,7 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 		ICE_FLOW_FV_EXTRACT_SZ;
 	flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits);
 	flds[fld].xtrct.idx = params->es_cnt;
+	flds[fld].xtrct.mask = ice_flds_info[fld].mask;
 
 	/* Adjust the next field-entry index after accommodating the number of
 	 * entries this field consumes
@@ -416,24 +1058,34 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 
 	/* Fill in the extraction sequence entries needed for this field */
 	off = flds[fld].xtrct.off;
+	mask = flds[fld].xtrct.mask;
 	for (i = 0; i < cnt; i++) {
-		u8 idx;
-
-		/* Make sure the number of extraction sequence required
-		 * does not exceed the block's capability
+		/* Only consume an extraction sequence entry if there is no
+		 * sibling field associated with this field or the sibling entry
+		 * already extracts the word shared with this field.
 		 */
-		if (params->es_cnt >= fv_words)
-			return ICE_ERR_MAX_LIMIT;
+		if (sib == ICE_FLOW_FIELD_IDX_MAX ||
+		    flds[sib].xtrct.prot_id == ICE_PROT_ID_INVAL ||
+		    flds[sib].xtrct.off != off) {
+			u8 idx;
 
-		/* some blocks require a reversed field vector layout */
-		if (hw->blk[params->blk].es.reverse)
-			idx = fv_words - params->es_cnt - 1;
-		else
-			idx = params->es_cnt;
+			/* Make sure the number of extraction sequence required
+			 * does not exceed the block's capability
+			 */
+			if (params->es_cnt >= fv_words)
+				return ICE_ERR_MAX_LIMIT;
 
-		params->es[idx].prot_id = prot_id;
-		params->es[idx].off = off;
-		params->es_cnt++;
+			/* some blocks require a reversed field vector layout */
+			if (hw->blk[params->blk].es.reverse)
+				idx = fv_words - params->es_cnt - 1;
+			else
+				idx = params->es_cnt;
+
+			params->es[idx].prot_id = prot_id;
+			params->es[idx].off = off;
+			params->mask[idx] = mask | sib_mask;
+			params->es_cnt++;
+		}
 
 		off += ICE_FLOW_FV_EXTRACT_SZ;
 	}
@@ -533,14 +1185,15 @@ ice_flow_create_xtrct_seq(struct ice_hw *hw,
 	u8 i;
 
 	for (i = 0; i < prof->segs_cnt; i++) {
-		u8 j;
+		u64 match = params->prof->segs[i].match;
+		enum ice_flow_field j;
 
-		for_each_set_bit(j, (unsigned long *)&prof->segs[i].match,
+		for_each_set_bit(j, (unsigned long *)&match,
 				 ICE_FLOW_FIELD_IDX_MAX) {
-			status = ice_flow_xtract_fld(hw, params, i,
-						     (enum ice_flow_field)j);
+			status = ice_flow_xtract_fld(hw, params, i, j, match);
 			if (status)
 				return status;
+			clear_bit(j, (unsigned long *)&match);
 		}
 
 		/* Process raw matching bytes */
@@ -751,7 +1404,8 @@ ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
 
 	/* Add a HW profile for this flow profile */
 	status = ice_add_prof(hw, blk, prof_id, (u8 *)params->ptypes,
-			      params->es);
+			      params->attr, params->attr_cnt, params->es,
+			      params->mask);
 	if (status) {
 		ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
 		goto out;
@@ -1158,6 +1812,9 @@ ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
 	seg->raws_cnt++;
 }
 
+#define ICE_FLOW_RSS_SEG_HDR_L2_MASKS \
+	(ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN)
+
 #define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \
 	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
 
@@ -1165,7 +1822,8 @@ ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
 	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
 
 #define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \
-	(ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
+	(ICE_FLOW_RSS_SEG_HDR_L2_MASKS | \
+	 ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
 	 ICE_FLOW_RSS_SEG_HDR_L4_MASKS)
 
 /**
@@ -1193,7 +1851,8 @@ ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields,
 
 	ICE_FLOW_SET_HDRS(segs, flow_hdr);
 
-	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS)
+	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS &
+	    ~ICE_FLOW_RSS_HDRS_INNER_MASK & ~ICE_FLOW_SEG_HDR_IPV_OTHER)
 		return ICE_ERR_PARAM;
 
 	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS);
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 829f90b1e998..eec9def8ffca 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -30,6 +30,80 @@
 #define ICE_HASH_UDP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT)
 #define ICE_HASH_UDP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT)
 
+#define ICE_FLOW_HASH_GTP_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID))
+
+#define ICE_FLOW_HASH_GTP_IPV4_TEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID)
+#define ICE_FLOW_HASH_GTP_IPV6_TEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_TEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_TEID)
+#define ICE_FLOW_HASH_GTP_U_IPV6_TEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_EH_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_EH_QFI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_QFI))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_EH \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+	 ICE_FLOW_HASH_GTP_U_EH_QFI)
+#define ICE_FLOW_HASH_GTP_U_IPV6_EH \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+	 ICE_FLOW_HASH_GTP_U_EH_QFI)
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID))
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID_ETH \
+	(ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_TCP_ID \
+	(ICE_FLOW_HASH_TCP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_UDP_ID \
+	(ICE_FLOW_HASH_UDP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+
+#define ICE_FLOW_HASH_PFCP_SEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID))
+#define ICE_FLOW_HASH_PFCP_IPV4_SEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_PFCP_SEID)
+#define ICE_FLOW_HASH_PFCP_IPV6_SEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_PFCP_SEID)
+
+#define ICE_FLOW_HASH_L2TPV3_SESS_ID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID))
+#define ICE_FLOW_HASH_L2TPV3_IPV4_SESS_ID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+#define ICE_FLOW_HASH_L2TPV3_IPV6_SESS_ID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+
+#define ICE_FLOW_HASH_ESP_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI))
+#define ICE_FLOW_HASH_ESP_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_ESP_SPI)
+#define ICE_FLOW_HASH_ESP_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_ESP_SPI)
+
+#define ICE_FLOW_HASH_AH_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI))
+#define ICE_FLOW_HASH_AH_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_AH_SPI)
+#define ICE_FLOW_HASH_AH_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_AH_SPI)
+
+#define ICE_FLOW_HASH_NAT_T_ESP_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI))
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+
 /* Protocol header fields within a packet segment. A segment consists of one or
  * more protocol headers that make up a logical group of protocol headers. Each
  * logical group of protocol headers encapsulates or is encapsulated using/by
@@ -38,16 +112,66 @@
  */
 enum ice_flow_seg_hdr {
 	ICE_FLOW_SEG_HDR_NONE		= 0x00000000,
+	ICE_FLOW_SEG_HDR_ETH		= 0x00000001,
+	ICE_FLOW_SEG_HDR_VLAN		= 0x00000002,
 	ICE_FLOW_SEG_HDR_IPV4		= 0x00000004,
 	ICE_FLOW_SEG_HDR_IPV6		= 0x00000008,
+	ICE_FLOW_SEG_HDR_ARP		= 0x00000010,
+	ICE_FLOW_SEG_HDR_ICMP		= 0x00000020,
 	ICE_FLOW_SEG_HDR_TCP		= 0x00000040,
 	ICE_FLOW_SEG_HDR_UDP		= 0x00000080,
 	ICE_FLOW_SEG_HDR_SCTP		= 0x00000100,
 	ICE_FLOW_SEG_HDR_GRE		= 0x00000200,
+	ICE_FLOW_SEG_HDR_GTPC		= 0x00000400,
+	ICE_FLOW_SEG_HDR_GTPC_TEID	= 0x00000800,
+	ICE_FLOW_SEG_HDR_GTPU_IP	= 0x00001000,
+	ICE_FLOW_SEG_HDR_GTPU_EH	= 0x00002000,
+	ICE_FLOW_SEG_HDR_GTPU_DWN	= 0x00004000,
+	ICE_FLOW_SEG_HDR_GTPU_UP	= 0x00008000,
+	ICE_FLOW_SEG_HDR_PPPOE		= 0x00010000,
+	ICE_FLOW_SEG_HDR_PFCP_NODE	= 0x00020000,
+	ICE_FLOW_SEG_HDR_PFCP_SESSION	= 0x00040000,
+	ICE_FLOW_SEG_HDR_L2TPV3		= 0x00080000,
+	ICE_FLOW_SEG_HDR_ESP		= 0x00100000,
+	ICE_FLOW_SEG_HDR_AH		= 0x00200000,
+	ICE_FLOW_SEG_HDR_NAT_T_ESP	= 0x00400000,
+	ICE_FLOW_SEG_HDR_ETH_NON_IP	= 0x00800000,
+	/* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and
+	 * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs
+	 */
+	ICE_FLOW_SEG_HDR_IPV_OTHER      = 0x20000000,
 };
 
+/* These segments all have the same PTYPES, but are otherwise distinguished by
+ * the value of the gtp_eh_pdu and gtp_eh_pdu_link flags:
+ *
+ *                                gtp_eh_pdu     gtp_eh_pdu_link
+ * ICE_FLOW_SEG_HDR_GTPU_IP           0              0
+ * ICE_FLOW_SEG_HDR_GTPU_EH           1              don't care
+ * ICE_FLOW_SEG_HDR_GTPU_DWN          1              0
+ * ICE_FLOW_SEG_HDR_GTPU_UP           1              1
+ */
+#define ICE_FLOW_SEG_HDR_GTPU (ICE_FLOW_SEG_HDR_GTPU_IP | \
+			       ICE_FLOW_SEG_HDR_GTPU_EH | \
+			       ICE_FLOW_SEG_HDR_GTPU_DWN | \
+			       ICE_FLOW_SEG_HDR_GTPU_UP)
+#define ICE_FLOW_SEG_HDR_PFCP (ICE_FLOW_SEG_HDR_PFCP_NODE | \
+			       ICE_FLOW_SEG_HDR_PFCP_SESSION)
+
 enum ice_flow_field {
+	/* L2 */
+	ICE_FLOW_FIELD_IDX_ETH_DA,
+	ICE_FLOW_FIELD_IDX_ETH_SA,
+	ICE_FLOW_FIELD_IDX_S_VLAN,
+	ICE_FLOW_FIELD_IDX_C_VLAN,
+	ICE_FLOW_FIELD_IDX_ETH_TYPE,
 	/* L3 */
+	ICE_FLOW_FIELD_IDX_IPV4_DSCP,
+	ICE_FLOW_FIELD_IDX_IPV6_DSCP,
+	ICE_FLOW_FIELD_IDX_IPV4_TTL,
+	ICE_FLOW_FIELD_IDX_IPV4_PROT,
+	ICE_FLOW_FIELD_IDX_IPV6_TTL,
+	ICE_FLOW_FIELD_IDX_IPV6_PROT,
 	ICE_FLOW_FIELD_IDX_IPV4_SA,
 	ICE_FLOW_FIELD_IDX_IPV4_DA,
 	ICE_FLOW_FIELD_IDX_IPV6_SA,
@@ -59,9 +183,42 @@ enum ice_flow_field {
 	ICE_FLOW_FIELD_IDX_UDP_DST_PORT,
 	ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT,
 	ICE_FLOW_FIELD_IDX_SCTP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_TCP_FLAGS,
+	/* ARP */
+	ICE_FLOW_FIELD_IDX_ARP_SIP,
+	ICE_FLOW_FIELD_IDX_ARP_DIP,
+	ICE_FLOW_FIELD_IDX_ARP_SHA,
+	ICE_FLOW_FIELD_IDX_ARP_DHA,
+	ICE_FLOW_FIELD_IDX_ARP_OP,
+	/* ICMP */
+	ICE_FLOW_FIELD_IDX_ICMP_TYPE,
+	ICE_FLOW_FIELD_IDX_ICMP_CODE,
 	/* GRE */
 	ICE_FLOW_FIELD_IDX_GRE_KEYID,
-	/* The total number of enums must not exceed 64 */
+	/* GTPC_TEID */
+	ICE_FLOW_FIELD_IDX_GTPC_TEID,
+	/* GTPU_IP */
+	ICE_FLOW_FIELD_IDX_GTPU_IP_TEID,
+	/* GTPU_EH */
+	ICE_FLOW_FIELD_IDX_GTPU_EH_TEID,
+	ICE_FLOW_FIELD_IDX_GTPU_EH_QFI,
+	/* GTPU_UP */
+	ICE_FLOW_FIELD_IDX_GTPU_UP_TEID,
+	/* GTPU_DWN */
+	ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID,
+	/* PPPoE */
+	ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID,
+	/* PFCP */
+	ICE_FLOW_FIELD_IDX_PFCP_SEID,
+	/* L2TPv3 */
+	ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID,
+	/* ESP */
+	ICE_FLOW_FIELD_IDX_ESP_SPI,
+	/* AH */
+	ICE_FLOW_FIELD_IDX_AH_SPI,
+	/* NAT_T ESP */
+	ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+	 /* The total number of enums must not exceed 64 */
 	ICE_FLOW_FIELD_IDX_MAX
 };
 
@@ -138,6 +295,7 @@ struct ice_flow_seg_xtrct {
 	u16 off;	/* Starting offset of the field in header in bytes */
 	u8 idx;		/* Index of FV entry used */
 	u8 disp;	/* Displacement of field in bits fr. FV entry's start */
+	u16 mask;	/* Mask for field */
 };
 
 enum ice_flow_fld_match_type {
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 093a1818a392..67b5b9b9d009 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -306,8 +306,23 @@
 #define GLQF_FD_SIZE_FD_BSIZE_S			16
 #define GLQF_FD_SIZE_FD_BSIZE_M			ICE_M(0x7FFF, 16)
 #define GLQF_FDINSET(_i, _j)			(0x00412000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_FDMASK(_i)				(0x00410800 + ((_i) * 4))
+#define GLQF_FDMASK_MAX_INDEX			31
+#define GLQF_FDMASK_MSK_INDEX_S			0
+#define GLQF_FDMASK_MSK_INDEX_M			ICE_M(0x1F, 0)
+#define GLQF_FDMASK_MASK_S			16
+#define GLQF_FDMASK_MASK_M			ICE_M(0xFFFF, 16)
 #define GLQF_FDMASK_SEL(_i)			(0x00410400 + ((_i) * 4))
 #define GLQF_FDSWAP(_i, _j)			(0x00413000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_HMASK(_i)				(0x0040FC00 + ((_i) * 4))
+#define GLQF_HMASK_MAX_INDEX			31
+#define GLQF_HMASK_MSK_INDEX_S			0
+#define GLQF_HMASK_MSK_INDEX_M			ICE_M(0x1F, 0)
+#define GLQF_HMASK_MASK_S			16
+#define GLQF_HMASK_MASK_M			ICE_M(0xFFFF, 16)
+#define GLQF_HMASK_SEL(_i)			(0x00410000 + ((_i) * 4))
+#define GLQF_HMASK_SEL_MAX_INDEX		127
+#define GLQF_HMASK_SEL_MASK_SEL_S		0
 #define PFQF_FD_ENA				0x0043A000
 #define PFQF_FD_ENA_FD_ENA_M			BIT(0)
 #define PFQF_FD_SIZE				0x00460100
@@ -369,6 +384,9 @@
 #define VSIQF_FD_CNT(_VSI)			(0x00464000 + ((_VSI) * 4))
 #define VSIQF_FD_CNT_FD_GCNT_S			0
 #define VSIQF_FD_CNT_FD_GCNT_M			ICE_M(0x3FFF, 0)
+#define VSIQF_FD_CNT_FD_BCNT_S			16
+#define VSIQF_FD_CNT_FD_BCNT_M			ICE_M(0x3FFF, 16)
+#define VSIQF_FD_SIZE(_VSI)			(0x00462000 + ((_VSI) * 4))
 #define VSIQF_HKEY_MAX_INDEX			12
 #define VSIQF_HLUT_MAX_INDEX			15
 #define PFPM_APM				0x000B8080
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 4ec24c3e813f..21329ed3087e 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -55,6 +55,7 @@ struct ice_fltr_desc {
 #define ICE_FXD_FLTR_QW0_COMP_REPORT_M	\
 				(0x3ULL << ICE_FXD_FLTR_QW0_COMP_REPORT_S)
 #define ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL	0x1ULL
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW		0x2ULL
 
 #define ICE_FXD_FLTR_QW0_FD_SPACE_S	14
 #define ICE_FXD_FLTR_QW0_FD_SPACE_M	(0x3ULL << ICE_FXD_FLTR_QW0_FD_SPACE_S)
@@ -128,6 +129,7 @@ struct ice_fltr_desc {
 #define ICE_FXD_FLTR_QW1_FDID_PRI_S	25
 #define ICE_FXD_FLTR_QW1_FDID_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_FDID_PRI_S)
 #define ICE_FXD_FLTR_QW1_FDID_PRI_ONE	0x1ULL
+#define ICE_FXD_FLTR_QW1_FDID_PRI_THREE	0x3ULL
 
 #define ICE_FXD_FLTR_QW1_FDID_MDID_S	28
 #define ICE_FXD_FLTR_QW1_FDID_MDID_M	(0xFULL << ICE_FXD_FLTR_QW1_FDID_MDID_S)
@@ -138,6 +140,26 @@ struct ice_fltr_desc {
 			(0xFFFFFFFFULL << ICE_FXD_FLTR_QW1_FDID_S)
 #define ICE_FXD_FLTR_QW1_FDID_ZERO	0x0ULL
 
+/* definition for FD filter programming status descriptor WB format */
+#define ICE_FXD_FLTR_WB_QW1_DD_S	0
+#define ICE_FXD_FLTR_WB_QW1_DD_M	(0x1ULL << ICE_FXD_FLTR_WB_QW1_DD_S)
+#define ICE_FXD_FLTR_WB_QW1_DD_YES	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_S	1
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_M	\
+				(0x3ULL << ICE_FXD_FLTR_WB_QW1_PROG_ID_S)
+#define ICE_FXD_FLTR_WB_QW1_PROG_ADD	0x0ULL
+#define ICE_FXD_FLTR_WB_QW1_PROG_DEL	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_S	4
+#define ICE_FXD_FLTR_WB_QW1_FAIL_M	(0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_YES	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S	5
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M	\
+				(0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES	0x1ULL
+
 struct ice_rx_ptype_decoded {
 	u32 ptype:10;
 	u32 known:1;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 8d4e2ad4328d..c345432fac72 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -343,6 +343,9 @@ static int ice_vsi_clear(struct ice_vsi *vsi)
 	pf->vsi[vsi->idx] = NULL;
 	if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL)
 		pf->next_vsi = vsi->idx;
+	if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL &&
+	    vsi->vf_id != ICE_INVAL_VFID)
+		pf->next_vsi = vsi->idx;
 
 	ice_vsi_free_arrays(vsi);
 	mutex_unlock(&pf->sw_mutex);
@@ -454,8 +457,8 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 		goto unlock_pf;
 	}
 
-	if (vsi->type == ICE_VSI_CTRL) {
-		/* Use the last VSI slot as the index for the control VSI */
+	if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) {
+		/* Use the last VSI slot as the index for PF control VSI */
 		vsi->idx = pf->num_alloc_vsi - 1;
 		pf->ctrl_vsi_idx = vsi->idx;
 		pf->vsi[vsi->idx] = vsi;
@@ -468,6 +471,9 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 		pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
 						 pf->next_vsi);
 	}
+
+	if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID)
+		pf->vf[vf_id].ctrl_vsi_idx = vsi->idx;
 	goto unlock_pf;
 
 err_rings:
@@ -506,7 +512,7 @@ static int ice_alloc_fd_res(struct ice_vsi *vsi)
 	if (!b_val)
 		return -EPERM;
 
-	if (vsi->type != ICE_VSI_PF)
+	if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF))
 		return -EPERM;
 
 	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
@@ -517,6 +523,13 @@ static int ice_alloc_fd_res(struct ice_vsi *vsi)
 	/* each VSI gets same "best_effort" quota */
 	vsi->num_bfltr = b_val;
 
+	if (vsi->type == ICE_VSI_VF) {
+		vsi->num_gfltr = 0;
+
+		/* each VSI gets same "best_effort" quota */
+		vsi->num_bfltr = b_val;
+	}
+
 	return 0;
 }
 
@@ -856,7 +869,8 @@ static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 	u8 dflt_q_group, dflt_q_prio;
 	u16 dflt_q, report_q, val;
 
-	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL)
+	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL &&
+	    vsi->type != ICE_VSI_VF)
 		return;
 
 	val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
@@ -1179,7 +1193,24 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 
 	num_q_vectors = vsi->num_q_vectors;
 	/* reserve slots from OS requested IRQs */
-	base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx);
+	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
+		struct ice_vf *vf;
+		int i;
+
+		ice_for_each_vf(pf, i) {
+			vf = &pf->vf[i];
+			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) {
+				base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
+				break;
+			}
+		}
+		if (i == pf->num_alloc_vfs)
+			base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+					   ICE_RES_VF_CTRL_VEC_ID);
+	} else {
+		base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+				   vsi->idx);
+	}
 
 	if (base < 0) {
 		dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
@@ -2308,7 +2339,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	struct ice_vsi *vsi;
 	int ret, i;
 
-	if (vsi_type == ICE_VSI_VF)
+	if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL)
 		vsi = ice_vsi_alloc(pf, vsi_type, vf_id);
 	else
 		vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID);
@@ -2323,7 +2354,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	if (vsi->type == ICE_VSI_PF)
 		vsi->ethtype = ETH_P_PAUSE;
 
-	if (vsi->type == ICE_VSI_VF)
+	if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL)
 		vsi->vf_id = vf_id;
 
 	ice_alloc_fd_res(vsi);
@@ -2770,7 +2801,24 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	 * many interrupts each VF needs. SR-IOV MSIX resources are also
 	 * cleared in the same manner.
 	 */
-	if (vsi->type != ICE_VSI_VF) {
+	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
+		struct ice_vf *vf;
+		int i;
+
+		ice_for_each_vf(pf, i) {
+			vf = &pf->vf[i];
+			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI)
+				break;
+		}
+		if (i == pf->num_alloc_vfs) {
+			/* No other VFs left that have control VSI, reclaim SW
+			 * interrupts back to the common pool
+			 */
+			ice_free_res(pf->irq_tracker, vsi->base_vector,
+				     ICE_RES_VF_CTRL_VEC_ID);
+			pf->num_avail_sw_msix += vsi->num_q_vectors;
+		}
+	} else if (vsi->type != ICE_VSI_VF) {
 		/* reclaim SW interrupts back to the common pool */
 		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 		pf->num_avail_sw_msix += vsi->num_q_vectors;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2c23c8f468a5..f318d7f607e4 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1044,7 +1044,7 @@ struct ice_aq_task {
 };
 
 /**
- * ice_wait_for_aq_event - Wait for an AdminQ event from firmware
+ * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
  * @pf: pointer to the PF private structure
  * @opcode: the opcode to wait for
  * @timeout: how long to wait, in jiffies
@@ -2071,6 +2071,7 @@ static void ice_service_task(struct work_struct *work)
 	ice_process_vflr_event(pf);
 	ice_clean_mailboxq_subtask(pf);
 	ice_sync_arfs_fltrs(pf);
+	ice_flush_fdir_ctx(pf);
 	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
 	ice_service_task_complete(pf);
 
@@ -2082,6 +2083,7 @@ static void ice_service_task(struct work_struct *work)
 	    test_bit(__ICE_MDD_EVENT_PENDING, pf->state) ||
 	    test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
 	    test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
+	    test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) ||
 	    test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
 		mod_timer(&pf->serv_tmr, jiffies);
 }
@@ -2220,8 +2222,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 			/* skip this unused q_vector */
 			continue;
 		}
-		err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0,
-				       q_vector->name, q_vector);
+		if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID)
+			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+					       IRQF_SHARED, q_vector->name,
+					       q_vector);
+		else
+			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+					       0, q_vector->name, q_vector);
 		if (err) {
 			netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
 				   err);
@@ -4314,7 +4321,7 @@ static void ice_set_wake(struct ice_pf *pf)
 }
 
 /**
- * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet
+ * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
  * @pf: pointer to the PF struct
  *
  * Issue firmware command to enable multicast magic wake, making
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
index 7f4c1ec1eff2..199aa5b71540 100644
--- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -13,6 +13,9 @@
 enum ice_prot_id {
 	ICE_PROT_ID_INVAL	= 0,
 	ICE_PROT_MAC_OF_OR_S	= 1,
+	ICE_PROT_MAC_IL		= 4,
+	ICE_PROT_ETYPE_OL	= 9,
+	ICE_PROT_ETYPE_IL	= 10,
 	ICE_PROT_IPV4_OF_OR_S	= 32,
 	ICE_PROT_IPV4_IL	= 33,
 	ICE_PROT_IPV6_OF_OR_S	= 40,
@@ -21,7 +24,14 @@ enum ice_prot_id {
 	ICE_PROT_UDP_OF		= 52,
 	ICE_PROT_UDP_IL_OR_S	= 53,
 	ICE_PROT_GRE_OF		= 64,
+	ICE_PROT_ESP_F		= 88,
+	ICE_PROT_ESP_2		= 89,
 	ICE_PROT_SCTP_IL	= 96,
+	ICE_PROT_ICMP_IL	= 98,
+	ICE_PROT_ICMPV6_IL	= 100,
+	ICE_PROT_PPPOE		= 103,
+	ICE_PROT_L2TPV3		= 104,
+	ICE_PROT_ARP_OF		= 118,
 	ICE_PROT_META_ID	= 255, /* when offset == metadata */
 	ICE_PROT_INVALID	= 255  /* when offset == ICE_FV_OFFSET_INVAL */
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 2403cb38b93c..f890337cc24a 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -1857,7 +1857,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
 }
 
 /**
- * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry
+ * ice_sched_rm_agg_vsi_info - remove aggregator related VSI info entry
  * @pi: port information structure
  * @vsi_handle: software VSI handle
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index b91dcfd12727..3148e78adc36 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1098,6 +1098,11 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		dma_rmb();
 
 		if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
+			struct ice_vsi *ctrl_vsi = rx_ring->vsi;
+
+			if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
+			    ctrl_vsi->vf_id != ICE_INVAL_VFID)
+				ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
 			ice_put_rx_buf(rx_ring, NULL, 0);
 			cleaned_count++;
 			continue;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 02b12736ea80..207f6ee3a7f6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -143,6 +143,7 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
 	case ICE_RX_PTYPE_INNER_PROT_UDP:
 	case ICE_RX_PTYPE_INNER_PROT_SCTP:
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a6cb0c35748c..2893143d9e62 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -192,6 +192,24 @@ enum ice_fltr_ptype {
 	ICE_FLTR_PTYPE_NONF_IPV4_TCP,
 	ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
 	ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+	ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+	ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV4_AH,
+	ICE_FLTR_PTYPE_NONF_IPV6_AH,
+	ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
+	ICE_FLTR_PTYPE_NON_IP_L2,
 	ICE_FLTR_PTYPE_FRAG_IPV4,
 	ICE_FLTR_PTYPE_NONF_IPV6_UDP,
 	ICE_FLTR_PTYPE_NONF_IPV6_TCP,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
new file mode 100644
index 000000000000..1f4ba38b1599
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -0,0 +1,2204 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_flow.h"
+
+#define to_fltr_conf_from_desc(p) \
+	container_of(p, struct virtchnl_fdir_fltr_conf, input)
+
+#define ICE_FLOW_PROF_TYPE_S	0
+#define ICE_FLOW_PROF_TYPE_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_TYPE_S)
+#define ICE_FLOW_PROF_VSI_S	32
+#define ICE_FLOW_PROF_VSI_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_VSI_S)
+
+/* Flow profile ID format:
+ * [0:31] - flow type, flow + tun_offs
+ * [32:63] - VSI index
+ */
+#define ICE_FLOW_PROF_FD(vsi, flow, tun_offs) \
+	((u64)(((((flow) + (tun_offs)) & ICE_FLOW_PROF_TYPE_M)) | \
+	      (((u64)(vsi) << ICE_FLOW_PROF_VSI_S) & ICE_FLOW_PROF_VSI_M)))
+
+#define GTPU_TEID_OFFSET 4
+#define GTPU_EH_QFI_OFFSET 1
+#define GTPU_EH_QFI_MASK 0x3F
+#define PFCP_S_OFFSET 0
+#define PFCP_S_MASK 0x1
+#define PFCP_PORT_NR 8805
+
+#define FDIR_INSET_FLAG_ESP_S 0
+#define FDIR_INSET_FLAG_ESP_M BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_UDP BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_IPSEC (0ULL << FDIR_INSET_FLAG_ESP_S)
+
+enum ice_fdir_tunnel_type {
+	ICE_FDIR_TUNNEL_TYPE_NONE = 0,
+	ICE_FDIR_TUNNEL_TYPE_GTPU,
+	ICE_FDIR_TUNNEL_TYPE_GTPU_EH,
+};
+
+struct virtchnl_fdir_fltr_conf {
+	struct ice_fdir_fltr input;
+	enum ice_fdir_tunnel_type ttype;
+	u64 inset_flag;
+	u32 flow_id;
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ether[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_tcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_TCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_udp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_sctp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_SCTP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_tcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_TCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_udp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_sctp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_SCTP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_gtpu[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_GTPU_IP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_gtpu_eh[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_GTPU_IP,
+	VIRTCHNL_PROTO_HDR_GTPU_EH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_l2tpv3[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_L2TPV3,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_l2tpv3[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_L2TPV3,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_ah[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_AH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_ah[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_AH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_nat_t_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_nat_t_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_pfcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_PFCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_pfcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_PFCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+struct virtchnl_fdir_pattern_match_item {
+	enum virtchnl_proto_hdr_type *list;
+	u64 input_set;
+	u64 *meta;
+};
+
+static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_os[] = {
+	{vc_pattern_ipv4,                     0,         NULL},
+	{vc_pattern_ipv4_tcp,                 0,         NULL},
+	{vc_pattern_ipv4_udp,                 0,         NULL},
+	{vc_pattern_ipv4_sctp,                0,         NULL},
+	{vc_pattern_ipv6,                     0,         NULL},
+	{vc_pattern_ipv6_tcp,                 0,         NULL},
+	{vc_pattern_ipv6_udp,                 0,         NULL},
+	{vc_pattern_ipv6_sctp,                0,         NULL},
+};
+
+static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_comms[] = {
+	{vc_pattern_ipv4,                     0,         NULL},
+	{vc_pattern_ipv4_tcp,                 0,         NULL},
+	{vc_pattern_ipv4_udp,                 0,         NULL},
+	{vc_pattern_ipv4_sctp,                0,         NULL},
+	{vc_pattern_ipv6,                     0,         NULL},
+	{vc_pattern_ipv6_tcp,                 0,         NULL},
+	{vc_pattern_ipv6_udp,                 0,         NULL},
+	{vc_pattern_ipv6_sctp,                0,         NULL},
+	{vc_pattern_ether,                    0,         NULL},
+	{vc_pattern_ipv4_gtpu,                0,         NULL},
+	{vc_pattern_ipv4_gtpu_eh,             0,         NULL},
+	{vc_pattern_ipv4_l2tpv3,              0,         NULL},
+	{vc_pattern_ipv6_l2tpv3,              0,         NULL},
+	{vc_pattern_ipv4_esp,                 0,         NULL},
+	{vc_pattern_ipv6_esp,                 0,         NULL},
+	{vc_pattern_ipv4_ah,                  0,         NULL},
+	{vc_pattern_ipv6_ah,                  0,         NULL},
+	{vc_pattern_ipv4_nat_t_esp,           0,         NULL},
+	{vc_pattern_ipv6_nat_t_esp,           0,         NULL},
+	{vc_pattern_ipv4_pfcp,                0,         NULL},
+	{vc_pattern_ipv6_pfcp,                0,         NULL},
+};
+
+struct virtchnl_fdir_inset_map {
+	enum virtchnl_proto_hdr_field field;
+	enum ice_flow_field fld;
+	u64 flag;
+	u64 mask;
+};
+
+static const struct virtchnl_fdir_inset_map fdir_inset_map[] = {
+	{VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, ICE_FLOW_FIELD_IDX_ETH_TYPE, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_SRC, ICE_FLOW_FIELD_IDX_IPV4_SA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_DST, ICE_FLOW_FIELD_IDX_IPV4_DA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_DSCP, ICE_FLOW_FIELD_IDX_IPV4_DSCP, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_TTL, ICE_FLOW_FIELD_IDX_IPV4_TTL, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_PROT, ICE_FLOW_FIELD_IDX_IPV4_PROT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_SRC, ICE_FLOW_FIELD_IDX_IPV6_SA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_DST, ICE_FLOW_FIELD_IDX_IPV6_DA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_TC, ICE_FLOW_FIELD_IDX_IPV6_DSCP, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, ICE_FLOW_FIELD_IDX_IPV6_TTL, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV6_PROT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_UDP_SRC_PORT, ICE_FLOW_FIELD_IDX_UDP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID, 0, 0},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, 0, 0},
+	{VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_ESP_SPI,
+		FDIR_INSET_FLAG_ESP_IPSEC, FDIR_INSET_FLAG_ESP_M},
+	{VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+		FDIR_INSET_FLAG_ESP_UDP, FDIR_INSET_FLAG_ESP_M},
+	{VIRTCHNL_PROTO_HDR_AH_SPI, ICE_FLOW_FIELD_IDX_AH_SPI, 0, 0},
+	{VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID, ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, 0, 0},
+	{VIRTCHNL_PROTO_HDR_PFCP_S_FIELD, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
+};
+
+/**
+ * ice_vc_fdir_param_check
+ * @vf: pointer to the VF structure
+ * @vsi_id: VF relative VSI ID
+ *
+ * Check for the valid VSI ID, PF's state and VF's state
+ *
+ * Return: 0 on success, and -EINVAL on error.
+ */
+static int
+ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id)
+{
+	struct ice_pf *pf = vf->pf;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EINVAL;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+		return -EINVAL;
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF))
+		return -EINVAL;
+
+	if (vsi_id != vf->lan_vsi_num)
+		return -EINVAL;
+
+	if (!ice_vc_isvalid_vsi_id(vf, vsi_id))
+		return -EINVAL;
+
+	if (!pf->vsi[vf->lan_vsi_idx])
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_vf_start_ctrl_vsi
+ * @vf: pointer to the VF structure
+ *
+ * Allocate ctrl_vsi for the first time and open the ctrl_vsi port for VF
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vf_start_ctrl_vsi(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *ctrl_vsi;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		return -EEXIST;
+
+	ctrl_vsi = ice_vf_ctrl_vsi_setup(vf);
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "Could not setup control VSI for VF %d\n",
+			vf->vf_id);
+		return -ENOMEM;
+	}
+
+	err = ice_vsi_open_ctrl(ctrl_vsi);
+	if (err) {
+		dev_dbg(dev, "Could not open control VSI for VF %d\n",
+			vf->vf_id);
+		goto err_vsi_open;
+	}
+
+	return 0;
+
+err_vsi_open:
+	ice_vsi_release(ctrl_vsi);
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI) {
+		pf->vsi[vf->ctrl_vsi_idx] = NULL;
+		vf->ctrl_vsi_idx = ICE_NO_VSI;
+	}
+	return err;
+}
+
+/**
+ * ice_vc_fdir_alloc_prof - allocate profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_alloc_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	if (!fdir->fdir_prof) {
+		fdir->fdir_prof = devm_kcalloc(ice_pf_to_dev(vf->pf),
+					       ICE_FLTR_PTYPE_MAX,
+					       sizeof(*fdir->fdir_prof),
+					       GFP_KERNEL);
+		if (!fdir->fdir_prof)
+			return -ENOMEM;
+	}
+
+	if (!fdir->fdir_prof[flow]) {
+		fdir->fdir_prof[flow] = devm_kzalloc(ice_pf_to_dev(vf->pf),
+						     sizeof(**fdir->fdir_prof),
+						     GFP_KERNEL);
+		if (!fdir->fdir_prof[flow])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_free_prof - free profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ */
+static void
+ice_vc_fdir_free_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	if (!fdir->fdir_prof)
+		return;
+
+	if (!fdir->fdir_prof[flow])
+		return;
+
+	devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof[flow]);
+	fdir->fdir_prof[flow] = NULL;
+}
+
+/**
+ * ice_vc_fdir_free_prof_all - free all the profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_free_prof_all(struct ice_vf *vf)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	enum ice_fltr_ptype flow;
+
+	if (!fdir->fdir_prof)
+		return;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX; flow++)
+		ice_vc_fdir_free_prof(vf, flow);
+
+	devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof);
+	fdir->fdir_prof = NULL;
+}
+
+/**
+ * ice_vc_fdir_parse_flow_fld
+ * @proto_hdr: virtual channel protocol filter header
+ * @conf: FDIR configuration for each filter
+ * @fld: field type array
+ * @fld_cnt: field counter
+ *
+ * Parse the virtual channel filter header and store them into field type array
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_flow_fld(struct virtchnl_proto_hdr *proto_hdr,
+			   struct virtchnl_fdir_fltr_conf *conf,
+			   enum ice_flow_field *fld, int *fld_cnt)
+{
+	struct virtchnl_proto_hdr hdr;
+	u32 i;
+
+	memcpy(&hdr, proto_hdr, sizeof(hdr));
+
+	for (i = 0; (i < ARRAY_SIZE(fdir_inset_map)) &&
+	     VIRTCHNL_GET_PROTO_HDR_FIELD(&hdr); i++)
+		if (VIRTCHNL_TEST_PROTO_HDR(&hdr, fdir_inset_map[i].field)) {
+			if (fdir_inset_map[i].mask &&
+			    ((fdir_inset_map[i].mask & conf->inset_flag) !=
+			     fdir_inset_map[i].flag))
+				continue;
+
+			fld[*fld_cnt] = fdir_inset_map[i].fld;
+			*fld_cnt += 1;
+			if (*fld_cnt >= ICE_FLOW_FIELD_IDX_MAX)
+				return -EINVAL;
+			VIRTCHNL_DEL_PROTO_HDR_FIELD(&hdr,
+						     fdir_inset_map[i].field);
+		}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_fld
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Parse the virtual channel add msg buffer's field vector and store them into
+ * flow's packet segment field
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_fld(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			 struct virtchnl_fdir_fltr_conf *conf,
+			 struct ice_flow_seg_info *seg)
+{
+	struct virtchnl_fdir_rule *rule = &fltr->rule_cfg;
+	enum ice_flow_field fld[ICE_FLOW_FIELD_IDX_MAX];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct virtchnl_proto_hdrs *proto;
+	int fld_cnt = 0;
+	int i;
+
+	proto = &rule->proto_hdrs;
+	for (i = 0; i < proto->count; i++) {
+		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+		int ret;
+
+		ret = ice_vc_fdir_parse_flow_fld(hdr, conf, fld, &fld_cnt);
+		if (ret)
+			return ret;
+	}
+
+	if (fld_cnt == 0) {
+		dev_dbg(dev, "Empty input set for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < fld_cnt; i++)
+		ice_flow_set_fld(seg, fld[i],
+				 ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_hdr - config the flow's packet segment header
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf,
+			 struct ice_flow_seg_info *seg)
+{
+	enum ice_fltr_ptype flow = conf->input.flow_type;
+	enum ice_fdir_tunnel_type ttype = conf->ttype;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NON_IP_L2:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ETH_NON_IP);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+		if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU) {
+			ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_IP |
+					  ICE_FLOW_SEG_HDR_IPV4 |
+					  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		} else if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_EH) {
+			ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_EH |
+					  ICE_FLOW_SEG_HDR_GTPU_IP |
+					  ICE_FLOW_SEG_HDR_IPV4 |
+					  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		} else {
+			dev_dbg(dev, "Invalid tunnel type 0x%x for VF %d\n",
+				flow, vf->vf_id);
+			return -EINVAL;
+		}
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	default:
+		dev_dbg(dev, "Invalid flow type 0x%x for VF %d failed\n",
+			flow, vf->vf_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof - remove profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ */
+static void
+ice_vc_fdir_rem_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, int tun)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	struct ice_fd_hw_prof *vf_prof;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vf_vsi;
+	struct device *dev;
+	struct ice_hw *hw;
+	u64 prof_id;
+	int i;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	if (!fdir->fdir_prof || !fdir->fdir_prof[flow])
+		return;
+
+	vf_prof = fdir->fdir_prof[flow];
+
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vf_vsi) {
+		dev_dbg(dev, "NULL vf %d vsi pointer\n", vf->vf_id);
+		return;
+	}
+
+	if (!fdir->prof_entry_cnt[flow][tun])
+		return;
+
+	prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num,
+				   flow, tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+	for (i = 0; i < fdir->prof_entry_cnt[flow][tun]; i++)
+		if (vf_prof->entry_h[i][tun]) {
+			u16 vsi_num = ice_get_hw_vsi_num(hw, vf_prof->vsi_h[i]);
+
+			ice_rem_prof_id_flow(hw, ICE_BLK_FD, vsi_num, prof_id);
+			ice_flow_rem_entry(hw, ICE_BLK_FD,
+					   vf_prof->entry_h[i][tun]);
+			vf_prof->entry_h[i][tun] = 0;
+		}
+
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+	devm_kfree(dev, vf_prof->fdir_seg[tun]);
+	vf_prof->fdir_seg[tun] = NULL;
+
+	for (i = 0; i < vf_prof->cnt; i++)
+		vf_prof->vsi_h[i] = 0;
+
+	fdir->prof_entry_cnt[flow][tun] = 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof_all - remove profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
+{
+	enum ice_fltr_ptype flow;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+	     flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		ice_vc_fdir_rem_prof(vf, flow, 0);
+		ice_vc_fdir_rem_prof(vf, flow, 1);
+	}
+}
+
+/**
+ * ice_vc_fdir_write_flow_prof
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @seg: array of one or more packet segments that describe the flow
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Write the flow's profile config and packet segment into the hardware
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_write_flow_prof(struct ice_vf *vf, enum ice_fltr_ptype flow,
+			    struct ice_flow_seg_info *seg, int tun)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	struct ice_vsi *vf_vsi, *ctrl_vsi;
+	struct ice_flow_seg_info *old_seg;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_fd_hw_prof *vf_prof;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u64 entry1_h = 0;
+	u64 entry2_h = 0;
+	u64 prof_id;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vf_vsi)
+		return -EINVAL;
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	vf_prof = fdir->fdir_prof[flow];
+	old_seg = vf_prof->fdir_seg[tun];
+	if (old_seg) {
+		if (!memcmp(old_seg, seg, sizeof(*seg))) {
+			dev_dbg(dev, "Duplicated profile for VF %d!\n",
+				vf->vf_id);
+			return -EEXIST;
+		}
+
+		if (fdir->fdir_fltr_cnt[flow][tun]) {
+			ret = -EINVAL;
+			dev_dbg(dev, "Input set conflicts for VF %d\n",
+				vf->vf_id);
+			goto err_exit;
+		}
+
+		/* remove previously allocated profile */
+		ice_vc_fdir_rem_prof(vf, flow, tun);
+	}
+
+	prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num, flow,
+				   tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+	status = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+				   tun + 1, &prof);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev, "Could not add VSI flow 0x%x for VF %d\n",
+			flow, vf->vf_id);
+		goto err_exit;
+	}
+
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+				    vf_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry1_h);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev, "Could not add flow 0x%x VSI entry for VF %d\n",
+			flow, vf->vf_id);
+		goto err_prof;
+	}
+
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+				    ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry2_h);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev,
+			"Could not add flow 0x%x Ctrl VSI entry for VF %d\n",
+			flow, vf->vf_id);
+		goto err_entry_1;
+	}
+
+	vf_prof->fdir_seg[tun] = seg;
+	vf_prof->cnt = 0;
+	fdir->prof_entry_cnt[flow][tun] = 0;
+
+	vf_prof->entry_h[vf_prof->cnt][tun] = entry1_h;
+	vf_prof->vsi_h[vf_prof->cnt] = vf_vsi->idx;
+	vf_prof->cnt++;
+	fdir->prof_entry_cnt[flow][tun]++;
+
+	vf_prof->entry_h[vf_prof->cnt][tun] = entry2_h;
+	vf_prof->vsi_h[vf_prof->cnt] = ctrl_vsi->idx;
+	vf_prof->cnt++;
+	fdir->prof_entry_cnt[flow][tun]++;
+
+	return 0;
+
+err_entry_1:
+	ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+			     ice_get_hw_vsi_num(hw, vf_vsi->idx), prof_id);
+	ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+err_exit:
+	return ret;
+}
+
+/**
+ * ice_vc_fdir_config_input_set
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Config the input set type and value for virtual channel add msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			     struct virtchnl_fdir_fltr_conf *conf, int tun)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_flow_seg_info *seg;
+	enum ice_fltr_ptype flow;
+	int ret;
+
+	flow = input->flow_type;
+	ret = ice_vc_fdir_alloc_prof(vf, flow);
+	if (ret) {
+		dev_dbg(dev, "Alloc flow prof for VF %d failed\n", vf->vf_id);
+		return ret;
+	}
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	ret = ice_vc_fdir_set_flow_fld(vf, fltr, conf, seg);
+	if (ret) {
+		dev_dbg(dev, "Set flow field for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_set_flow_hdr(vf, conf, seg);
+	if (ret) {
+		dev_dbg(dev, "Set flow hdr for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_write_flow_prof(vf, flow, seg, tun);
+	if (ret == -EEXIST) {
+		devm_kfree(dev, seg);
+	} else if (ret) {
+		dev_dbg(dev, "Write flow profile for VF %d failed\n",
+			vf->vf_id);
+		goto err_exit;
+	}
+
+	return 0;
+
+err_exit:
+	devm_kfree(dev, seg);
+	return ret;
+}
+
+/**
+ * ice_vc_fdir_match_pattern
+ * @fltr: virtual channel add cmd buffer
+ * @type: virtual channel protocol filter header type
+ *
+ * Matching the header type by comparing fltr and type's value.
+ *
+ * Return: true on success, and false on error.
+ */
+static bool
+ice_vc_fdir_match_pattern(struct virtchnl_fdir_add *fltr,
+			  enum virtchnl_proto_hdr_type *type)
+{
+	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+	int i = 0;
+
+	while ((i < proto->count) &&
+	       (*type == proto->proto_hdr[i].type) &&
+	       (*type != VIRTCHNL_PROTO_HDR_NONE)) {
+		type++;
+		i++;
+	}
+
+	return ((i == proto->count) && (*type == VIRTCHNL_PROTO_HDR_NONE));
+}
+
+/**
+ * ice_vc_fdir_get_pattern - get while list pattern
+ * @vf: pointer to the VF info
+ * @len: filter list length
+ *
+ * Return: pointer to allowed filter list
+ */
+static const struct virtchnl_fdir_pattern_match_item *
+ice_vc_fdir_get_pattern(struct ice_vf *vf, int *len)
+{
+	const struct virtchnl_fdir_pattern_match_item *item;
+	struct ice_pf *pf = vf->pf;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+	if (!strncmp(hw->active_pkg_name, "ICE COMMS Package",
+		     sizeof(hw->active_pkg_name))) {
+		item = vc_fdir_pattern_comms;
+		*len = ARRAY_SIZE(vc_fdir_pattern_comms);
+	} else {
+		item = vc_fdir_pattern_os;
+		*len = ARRAY_SIZE(vc_fdir_pattern_os);
+	}
+
+	return item;
+}
+
+/**
+ * ice_vc_fdir_search_pattern
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ *
+ * Search for matched pattern from supported pattern list
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_search_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr)
+{
+	const struct virtchnl_fdir_pattern_match_item *pattern;
+	int len, i;
+
+	pattern = ice_vc_fdir_get_pattern(vf, &len);
+
+	for (i = 0; i < len; i++)
+		if (ice_vc_fdir_match_pattern(fltr, pattern[i].list))
+			return 0;
+
+	return -EINVAL;
+}
+
+/**
+ * ice_vc_fdir_parse_pattern
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's pattern and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			  struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+	enum virtchnl_proto_hdr_type l3 = VIRTCHNL_PROTO_HDR_NONE;
+	enum virtchnl_proto_hdr_type l4 = VIRTCHNL_PROTO_HDR_NONE;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_fdir_fltr *input = &conf->input;
+	int i;
+
+	if (proto->count > VIRTCHNL_MAX_NUM_PROTO_HDRS) {
+		dev_dbg(dev, "Invalid protocol count:0x%x for VF %d\n",
+			proto->count, vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < proto->count; i++) {
+		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+		struct ip_esp_hdr *esph;
+		struct ip_auth_hdr *ah;
+		struct sctphdr *sctph;
+		struct ipv6hdr *ip6h;
+		struct udphdr *udph;
+		struct tcphdr *tcph;
+		struct ethhdr *eth;
+		struct iphdr *iph;
+		u8 s_field;
+		u8 *rawh;
+
+		switch (hdr->type) {
+		case VIRTCHNL_PROTO_HDR_ETH:
+			eth = (struct ethhdr *)hdr->buffer;
+			input->flow_type = ICE_FLTR_PTYPE_NON_IP_L2;
+
+			if (hdr->field_selector)
+				input->ext_data.ether_type = eth->h_proto;
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV4:
+			iph = (struct iphdr *)hdr->buffer;
+			l3 = VIRTCHNL_PROTO_HDR_IPV4;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+
+			if (hdr->field_selector) {
+				input->ip.v4.src_ip = iph->saddr;
+				input->ip.v4.dst_ip = iph->daddr;
+				input->ip.v4.tos = iph->tos;
+				input->ip.v4.proto = iph->protocol;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV6:
+			ip6h = (struct ipv6hdr *)hdr->buffer;
+			l3 = VIRTCHNL_PROTO_HDR_IPV6;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+
+			if (hdr->field_selector) {
+				memcpy(input->ip.v6.src_ip,
+				       ip6h->saddr.in6_u.u6_addr8,
+				       sizeof(ip6h->saddr));
+				memcpy(input->ip.v6.dst_ip,
+				       ip6h->daddr.in6_u.u6_addr8,
+				       sizeof(ip6h->daddr));
+				input->ip.v6.tc = ((u8)(ip6h->priority) << 4) |
+						  (ip6h->flow_lbl[0] >> 4);
+				input->ip.v6.proto = ip6h->nexthdr;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_TCP:
+			tcph = (struct tcphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = tcph->source;
+					input->ip.v4.dst_port = tcph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = tcph->source;
+					input->ip.v6.dst_port = tcph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_UDP:
+			udph = (struct udphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = udph->source;
+					input->ip.v4.dst_port = udph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = udph->source;
+					input->ip.v6.dst_port = udph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_SCTP:
+			sctph = (struct sctphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type =
+					ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type =
+					ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = sctph->source;
+					input->ip.v4.dst_port = sctph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = sctph->source;
+					input->ip.v6.dst_port = sctph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_L2TPV3:
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3;
+
+			if (hdr->field_selector)
+				input->l2tpv3_data.session_id = *((__be32 *)hdr->buffer);
+			break;
+		case VIRTCHNL_PROTO_HDR_ESP:
+			esph = (struct ip_esp_hdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+			    l4 == VIRTCHNL_PROTO_HDR_UDP)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+				 l4 == VIRTCHNL_PROTO_HDR_UDP)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+				 l4 == VIRTCHNL_PROTO_HDR_NONE)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+				 l4 == VIRTCHNL_PROTO_HDR_NONE)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_ESP;
+
+			if (l4 == VIRTCHNL_PROTO_HDR_UDP)
+				conf->inset_flag |= FDIR_INSET_FLAG_ESP_UDP;
+			else
+				conf->inset_flag |= FDIR_INSET_FLAG_ESP_IPSEC;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.sec_parm_idx = esph->spi;
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.sec_parm_idx = esph->spi;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_AH:
+			ah = (struct ip_auth_hdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_AH;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_AH;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.sec_parm_idx = ah->spi;
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.sec_parm_idx = ah->spi;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_PFCP:
+			rawh = (u8 *)hdr->buffer;
+			s_field = (rawh[0] >> PFCP_S_OFFSET) & PFCP_S_MASK;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 0)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 1)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 0)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 1)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.dst_port = cpu_to_be16(PFCP_PORT_NR);
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.dst_port = cpu_to_be16(PFCP_PORT_NR);
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_GTPU_IP:
+			rawh = (u8 *)hdr->buffer;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER;
+
+			if (hdr->field_selector)
+				input->gtpu_data.teid = *(__be32 *)(&rawh[GTPU_TEID_OFFSET]);
+			conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU;
+			break;
+		case VIRTCHNL_PROTO_HDR_GTPU_EH:
+			rawh = (u8 *)hdr->buffer;
+
+			if (hdr->field_selector)
+				input->gtpu_data.qfi = rawh[GTPU_EH_QFI_OFFSET] & GTPU_EH_QFI_MASK;
+			conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU_EH;
+			break;
+		default:
+			dev_dbg(dev, "Invalid header type 0x:%x for VF %d\n",
+				hdr->type, vf->vf_id);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_parse_action
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's action and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_action(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			 struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct virtchnl_filter_action_set *as = &fltr->rule_cfg.action_set;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_fdir_fltr *input = &conf->input;
+	u32 dest_num = 0;
+	u32 mark_num = 0;
+	int i;
+
+	if (as->count > VIRTCHNL_MAX_NUM_ACTIONS) {
+		dev_dbg(dev, "Invalid action numbers:0x%x for VF %d\n",
+			as->count, vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < as->count; i++) {
+		struct virtchnl_filter_action *action = &as->actions[i];
+
+		switch (action->type) {
+		case VIRTCHNL_ACTION_PASSTHRU:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER;
+			break;
+		case VIRTCHNL_ACTION_DROP:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+			break;
+		case VIRTCHNL_ACTION_QUEUE:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+			input->q_index = action->act_conf.queue.index;
+			break;
+		case VIRTCHNL_ACTION_Q_REGION:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP;
+			input->q_index = action->act_conf.queue.index;
+			input->q_region = action->act_conf.queue.region;
+			break;
+		case VIRTCHNL_ACTION_MARK:
+			mark_num++;
+			input->fltr_id = action->act_conf.mark_id;
+			input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+			break;
+		default:
+			dev_dbg(dev, "Invalid action type:0x%x for VF %d\n",
+				action->type, vf->vf_id);
+			return -EINVAL;
+		}
+	}
+
+	if (dest_num == 0 || dest_num >= 2) {
+		dev_dbg(dev, "Invalid destination action for VF %d\n",
+			vf->vf_id);
+		return -EINVAL;
+	}
+
+	if (mark_num >= 2) {
+		dev_dbg(dev, "Too many mark actions for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_validate_fdir_fltr - validate the virtual channel filter
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_validate_fdir_fltr(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			  struct virtchnl_fdir_fltr_conf *conf)
+{
+	int ret;
+
+	ret = ice_vc_fdir_search_pattern(vf, fltr);
+	if (ret)
+		return ret;
+
+	ret = ice_vc_fdir_parse_pattern(vf, fltr, conf);
+	if (ret)
+		return ret;
+
+	return ice_vc_fdir_parse_action(vf, fltr, conf);
+}
+
+/**
+ * ice_vc_fdir_comp_rules - compare if two filter rules have the same value
+ * @conf_a: FDIR configuration for filter a
+ * @conf_b: FDIR configuration for filter b
+ *
+ * Return: 0 on success, and other on error.
+ */
+static bool
+ice_vc_fdir_comp_rules(struct virtchnl_fdir_fltr_conf *conf_a,
+		       struct virtchnl_fdir_fltr_conf *conf_b)
+{
+	struct ice_fdir_fltr *a = &conf_a->input;
+	struct ice_fdir_fltr *b = &conf_b->input;
+
+	if (conf_a->ttype != conf_b->ttype)
+		return false;
+	if (a->flow_type != b->flow_type)
+		return false;
+	if (memcmp(&a->ip, &b->ip, sizeof(a->ip)))
+		return false;
+	if (memcmp(&a->mask, &b->mask, sizeof(a->mask)))
+		return false;
+	if (memcmp(&a->gtpu_data, &b->gtpu_data, sizeof(a->gtpu_data)))
+		return false;
+	if (memcmp(&a->gtpu_mask, &b->gtpu_mask, sizeof(a->gtpu_mask)))
+		return false;
+	if (memcmp(&a->l2tpv3_data, &b->l2tpv3_data, sizeof(a->l2tpv3_data)))
+		return false;
+	if (memcmp(&a->l2tpv3_mask, &b->l2tpv3_mask, sizeof(a->l2tpv3_mask)))
+		return false;
+	if (memcmp(&a->ext_data, &b->ext_data, sizeof(a->ext_data)))
+		return false;
+	if (memcmp(&a->ext_mask, &b->ext_mask, sizeof(a->ext_mask)))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_fdir_is_dup_fltr
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ *
+ * Check if there is duplicated rule with same conf value
+ *
+ * Return: 0 true success, and false on error.
+ */
+static bool
+ice_vc_fdir_is_dup_fltr(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct ice_fdir_fltr *desc;
+	bool ret;
+
+	list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
+		struct virtchnl_fdir_fltr_conf *node =
+				to_fltr_conf_from_desc(desc);
+
+		ret = ice_vc_fdir_comp_rules(node, conf);
+		if (ret)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_vc_fdir_insert_entry
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: pointer to ID value allocated by driver
+ *
+ * Insert FDIR conf entry into list and allocate ID for this filter
+ *
+ * Return: 0 true success, and other on error.
+ */
+static int
+ice_vc_fdir_insert_entry(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf, u32 *id)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	int i;
+
+	/* alloc ID corresponding with conf */
+	i = idr_alloc(&vf->fdir.fdir_rule_idr, conf, 0,
+		      ICE_FDIR_MAX_FLTRS, GFP_KERNEL);
+	if (i < 0)
+		return -EINVAL;
+	*id = i;
+
+	list_add(&input->fltr_node, &vf->fdir.fdir_rule_list);
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_remove_entry - remove FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: filter rule's ID
+ */
+static void
+ice_vc_fdir_remove_entry(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf, u32 id)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+
+	idr_remove(&vf->fdir.fdir_rule_idr, id);
+	list_del(&input->fltr_node);
+}
+
+/**
+ * ice_vc_fdir_lookup_entry - lookup FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @id: filter rule's ID
+ *
+ * Return: NULL on error, and other on success.
+ */
+static struct virtchnl_fdir_fltr_conf *
+ice_vc_fdir_lookup_entry(struct ice_vf *vf, u32 id)
+{
+	return idr_find(&vf->fdir.fdir_rule_idr, id);
+}
+
+/**
+ * ice_vc_fdir_flush_entry - remove all FDIR conf entry
+ * @vf: pointer to the VF info
+ */
+static void ice_vc_fdir_flush_entry(struct ice_vf *vf)
+{
+	struct virtchnl_fdir_fltr_conf *conf;
+	struct ice_fdir_fltr *desc, *temp;
+
+	list_for_each_entry_safe(desc, temp,
+				 &vf->fdir.fdir_rule_list, fltr_node) {
+		conf = to_fltr_conf_from_desc(desc);
+		list_del(&desc->fltr_node);
+		devm_kfree(ice_pf_to_dev(vf->pf), conf);
+	}
+}
+
+/**
+ * ice_vc_fdir_write_fltr - write filter rule into hardware
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @add: true implies add rule, false implies del rules
+ * @is_tun: false implies non-tunnel type filter, true implies tunnel filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vc_fdir_write_fltr(struct ice_vf *vf,
+				  struct virtchnl_fdir_fltr_conf *conf,
+				  bool add, bool is_tun)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	struct ice_vsi *vsi, *ctrl_vsi;
+	struct ice_fltr_desc desc;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int ret;
+	u8 *pkt;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		dev_dbg(dev, "Invalid vsi for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	input->dest_vsi = vsi->idx;
+	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW;
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "Invalid ctrl_vsi for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	ice_fdir_get_prgm_desc(hw, input, &desc, add);
+	status = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev, "Gen training pkt for VF %d ptype %d failed\n",
+			vf->vf_id, input->flow_type);
+		goto err_free_pkt;
+	}
+
+	ret = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+	if (ret)
+		goto err_free_pkt;
+
+	return 0;
+
+err_free_pkt:
+	devm_kfree(dev, pkt);
+	return ret;
+}
+
+/**
+ * ice_vf_fdir_timer - FDIR program waiting timer interrupt handler
+ * @t: pointer to timer_list
+ */
+static void ice_vf_fdir_timer(struct timer_list *t)
+{
+	struct ice_vf_fdir_ctx *ctx_irq = from_timer(ctx_irq, t, rx_tmr);
+	struct ice_vf_fdir_ctx *ctx_done;
+	struct ice_vf_fdir *fdir;
+	unsigned long flags;
+	struct ice_vf *vf;
+	struct ice_pf *pf;
+
+	fdir = container_of(ctx_irq, struct ice_vf_fdir, ctx_irq);
+	vf = container_of(fdir, struct ice_vf, fdir);
+	ctx_done = &fdir->ctx_done;
+	pf = vf->pf;
+	spin_lock_irqsave(&fdir->ctx_lock, flags);
+	if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+	ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+	ctx_done->conf = ctx_irq->conf;
+	ctx_done->stat = ICE_FDIR_CTX_TIMEOUT;
+	ctx_done->v_opcode = ctx_irq->v_opcode;
+	spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+	set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vc_fdir_irq_handler - ctrl_vsi Rx queue interrupt handler
+ * @ctrl_vsi: pointer to a VF's CTRL VSI
+ * @rx_desc: pointer to FDIR Rx queue descriptor
+ */
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+			union ice_32b_rx_flex_desc *rx_desc)
+{
+	struct ice_pf *pf = ctrl_vsi->back;
+	struct ice_vf_fdir_ctx *ctx_done;
+	struct ice_vf_fdir_ctx *ctx_irq;
+	struct ice_vf_fdir *fdir;
+	unsigned long flags;
+	struct device *dev;
+	struct ice_vf *vf;
+	int ret;
+
+	vf = &pf->vf[ctrl_vsi->vf_id];
+
+	fdir = &vf->fdir;
+	ctx_done = &fdir->ctx_done;
+	ctx_irq = &fdir->ctx_irq;
+	dev = ice_pf_to_dev(pf);
+	spin_lock_irqsave(&fdir->ctx_lock, flags);
+	if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+	ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+	ctx_done->conf = ctx_irq->conf;
+	ctx_done->stat = ICE_FDIR_CTX_IRQ;
+	ctx_done->v_opcode = ctx_irq->v_opcode;
+	memcpy(&ctx_done->rx_desc, rx_desc, sizeof(*rx_desc));
+	spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+	ret = del_timer(&ctx_irq->rx_tmr);
+	if (!ret)
+		dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id);
+
+	set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vf_fdir_dump_info - dump FDIR information for diagnosis
+ * @vf: pointer to the VF info
+ */
+static void ice_vf_fdir_dump_info(struct ice_vf *vf)
+{
+	struct ice_vsi *vf_vsi;
+	u32 fd_size, fd_cnt;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u16 vsi_num;
+
+	pf = vf->pf;
+	hw = &pf->hw;
+	dev = ice_pf_to_dev(pf);
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi_num = ice_get_hw_vsi_num(hw, vf_vsi->idx);
+
+	fd_size = rd32(hw, VSIQF_FD_SIZE(vsi_num));
+	fd_cnt = rd32(hw, VSIQF_FD_CNT(vsi_num));
+	dev_dbg(dev, "VF %d: space allocated: guar:0x%x, be:0x%x, space consumed: guar:0x%x, be:0x%x",
+		vf->vf_id,
+		(fd_size & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+		(fd_size & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S,
+		(fd_cnt & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+		(fd_cnt & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S);
+}
+
+/**
+ * ice_vf_verify_rx_desc - verify received FDIR programming status descriptor
+ * @vf: pointer to the VF info
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+		      enum virtchnl_fdir_prgm_status *status)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	u32 stat_err, error, prog_id;
+	int ret;
+
+	stat_err = le16_to_cpu(ctx->rx_desc.wb.status_error0);
+	if (((stat_err & ICE_FXD_FLTR_WB_QW1_DD_M) >>
+	    ICE_FXD_FLTR_WB_QW1_DD_S) != ICE_FXD_FLTR_WB_QW1_DD_YES) {
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: Desc Done not set\n", vf->vf_id);
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	prog_id = (stat_err & ICE_FXD_FLTR_WB_QW1_PROG_ID_M) >>
+		ICE_FXD_FLTR_WB_QW1_PROG_ID_S;
+	if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD &&
+	    ctx->v_opcode != VIRTCHNL_OP_ADD_FDIR_FILTER) {
+		dev_err(dev, "VF %d: Desc show add, but ctx not",
+			vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_DEL &&
+	    ctx->v_opcode != VIRTCHNL_OP_DEL_FDIR_FILTER) {
+		dev_err(dev, "VF %d: Desc show del, but ctx not",
+			vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_M) >>
+		ICE_FXD_FLTR_WB_QW1_FAIL_S;
+	if (error == ICE_FXD_FLTR_WB_QW1_FAIL_YES) {
+		if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD) {
+			dev_err(dev, "VF %d, Failed to add FDIR rule due to no space in the table",
+				vf->vf_id);
+			*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		} else {
+			dev_err(dev, "VF %d, Failed to remove FDIR rule, attempt to remove non-existent entry",
+				vf->vf_id);
+			*status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+		}
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M) >>
+		ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S;
+	if (error == ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES) {
+		dev_err(dev, "VF %d: Profile matching error", vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	*status = VIRTCHNL_FDIR_SUCCESS;
+
+	return 0;
+
+err_exit:
+	ice_vf_fdir_dump_info(vf);
+	return ret;
+}
+
+/**
+ * ice_vc_add_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director add command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_add_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+			  enum virtchnl_fdir_prgm_status status,
+			  bool success)
+{
+	struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum virtchnl_status_code v_ret;
+	struct virtchnl_fdir_add *resp;
+	int ret, len, is_tun;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	len = sizeof(*resp);
+	resp = kzalloc(len, GFP_KERNEL);
+	if (!resp) {
+		len = 0;
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+		goto err_exit;
+	}
+
+	if (!success)
+		goto err_exit;
+
+	is_tun = 0;
+	resp->status = status;
+	resp->flow_id = conf->flow_id;
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++;
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+
+	dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+		vf->vf_id, conf->flow_id,
+		(ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+		"add" : "del");
+	return ret;
+
+err_exit:
+	if (resp)
+		resp->status = status;
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+	devm_kfree(dev, conf);
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+	return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director del command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_del_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+			  enum virtchnl_fdir_prgm_status status,
+			  bool success)
+{
+	struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum virtchnl_status_code v_ret;
+	struct virtchnl_fdir_del *resp;
+	int ret, len, is_tun;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	len = sizeof(*resp);
+	resp = kzalloc(len, GFP_KERNEL);
+	if (!resp) {
+		len = 0;
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+		goto err_exit;
+	}
+
+	if (!success)
+		goto err_exit;
+
+	is_tun = 0;
+	resp->status = status;
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--;
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+
+	dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+		vf->vf_id, conf->flow_id,
+		(ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+		"add" : "del");
+	devm_kfree(dev, conf);
+	return ret;
+
+err_exit:
+	if (resp)
+		resp->status = status;
+	if (success)
+		devm_kfree(dev, conf);
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+	return ret;
+}
+
+/**
+ * ice_flush_fdir_ctx
+ * @pf: pointer to the PF structure
+ *
+ * Flush all the pending event on ctx_done list and process them.
+ */
+void ice_flush_fdir_ctx(struct ice_pf *pf)
+{
+	int i;
+
+	if (!test_and_clear_bit(__ICE_FD_VF_FLUSH_CTX, pf->state))
+		return;
+
+	ice_for_each_vf(pf, i) {
+		struct device *dev = ice_pf_to_dev(pf);
+		enum virtchnl_fdir_prgm_status status;
+		struct ice_vf *vf = &pf->vf[i];
+		struct ice_vf_fdir_ctx *ctx;
+		unsigned long flags;
+		int ret;
+
+		if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			continue;
+
+		if (vf->ctrl_vsi_idx == ICE_NO_VSI)
+			continue;
+
+		ctx = &vf->fdir.ctx_done;
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		if (!(ctx->flags & ICE_VF_FDIR_CTX_VALID)) {
+			spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+			continue;
+		}
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+		WARN_ON(ctx->stat == ICE_FDIR_CTX_READY);
+		if (ctx->stat == ICE_FDIR_CTX_TIMEOUT) {
+			status = VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT;
+			dev_err(dev, "VF %d: ctrl_vsi irq timeout\n",
+				vf->vf_id);
+			goto err_exit;
+		}
+
+		ret = ice_vf_verify_rx_desc(vf, ctx, &status);
+		if (ret)
+			goto err_exit;
+
+		if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+			ice_vc_add_fdir_fltr_post(vf, ctx, status, true);
+		else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+			ice_vc_del_fdir_fltr_post(vf, ctx, status, true);
+		else
+			dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+		continue;
+err_exit:
+		if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+			ice_vc_add_fdir_fltr_post(vf, ctx, status, false);
+		else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+			ice_vc_del_fdir_fltr_post(vf, ctx, status, false);
+		else
+			dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+	}
+}
+
+/**
+ * ice_vc_fdir_set_irq_ctx - set FDIR context info for later IRQ handler
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @v_opcode: virtual channel operation code
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_irq_ctx(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf,
+			enum virtchnl_ops v_opcode)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vf_fdir_ctx *ctx;
+	unsigned long flags;
+
+	ctx = &vf->fdir.ctx_irq;
+	spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+	if ((vf->fdir.ctx_irq.flags & ICE_VF_FDIR_CTX_VALID) ||
+	    (vf->fdir.ctx_done.flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+		dev_dbg(dev, "VF %d: Last request is still in progress\n",
+			vf->vf_id);
+		return -EBUSY;
+	}
+	ctx->flags |= ICE_VF_FDIR_CTX_VALID;
+	spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+	ctx->conf = conf;
+	ctx->v_opcode = v_opcode;
+	ctx->stat = ICE_FDIR_CTX_READY;
+	timer_setup(&ctx->rx_tmr, ice_vf_fdir_timer, 0);
+
+	mod_timer(&ctx->rx_tmr, round_jiffies(msecs_to_jiffies(10) + jiffies));
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_clear_irq_ctx - clear FDIR context info for IRQ handler
+ * @vf: pointer to the VF structure
+ *
+ * Return: 0 on success, and other on error.
+ */
+static void ice_vc_fdir_clear_irq_ctx(struct ice_vf *vf)
+{
+	struct ice_vf_fdir_ctx *ctx = &vf->fdir.ctx_irq;
+	unsigned long flags;
+
+	del_timer(&ctx->rx_tmr);
+	spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+	ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+	spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+}
+
+/**
+ * ice_vc_add_fdir_fltr - add a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_fdir_add *fltr = (struct virtchnl_fdir_add *)msg;
+	struct virtchnl_fdir_add *stat = NULL;
+	struct virtchnl_fdir_fltr_conf *conf;
+	enum virtchnl_status_code v_ret;
+	struct device *dev;
+	struct ice_pf *pf;
+	int is_tun = 0;
+	int len = 0;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vf_start_ctrl_vsi(vf);
+	if (ret && (ret != -EEXIST)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_err(dev, "Init FDIR for VF %d failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_exit;
+	}
+
+	stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+	if (!stat) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	conf = devm_kzalloc(dev, sizeof(*conf), GFP_KERNEL);
+	if (!conf) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc conf for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	len = sizeof(*stat);
+	ret = ice_vc_validate_fdir_fltr(vf, fltr, conf);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		dev_dbg(dev, "Invalid FDIR filter from VF %d\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
+	if (fltr->validate_only) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_SUCCESS;
+		devm_kfree(dev, conf);
+		ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER,
+					    v_ret, (u8 *)stat, len);
+		goto exit;
+	}
+
+	ret = ice_vc_fdir_config_input_set(vf, fltr, conf, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT;
+		dev_err(dev, "VF %d: FDIR input set configure failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_is_dup_fltr(vf, conf);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_EXIST;
+		dev_dbg(dev, "VF %d: duplicated FDIR rule detected\n",
+			vf->vf_id);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_insert_entry(vf, conf, &conf->flow_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: insert FDIR list failed\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_ADD_FDIR_FILTER);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_rem_entry;
+	}
+
+exit:
+	kfree(stat);
+	return ret;
+
+err_rem_entry:
+	ice_vc_fdir_clear_irq_ctx(vf);
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+err_free_conf:
+	devm_kfree(dev, conf);
+err_exit:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, v_ret,
+				    (u8 *)stat, len);
+	kfree(stat);
+	return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr - delete a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_fdir_del *fltr = (struct virtchnl_fdir_del *)msg;
+	struct virtchnl_fdir_del *stat = NULL;
+	struct virtchnl_fdir_fltr_conf *conf;
+	enum virtchnl_status_code v_ret;
+	struct device *dev;
+	struct ice_pf *pf;
+	int is_tun = 0;
+	int len = 0;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+	if (!stat) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	len = sizeof(*stat);
+
+	conf = ice_vc_fdir_lookup_entry(vf, fltr->flow_id);
+	if (!conf) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+		dev_dbg(dev, "VF %d: FDIR invalid flow_id:0x%X\n",
+			vf->vf_id, fltr->flow_id);
+		goto err_exit;
+	}
+
+	/* Just return failure when ctrl_vsi idx is invalid */
+	if (vf->ctrl_vsi_idx == ICE_NO_VSI) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "Invalid FDIR ctrl_vsi for VF %d\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_DEL_FDIR_FILTER);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, false, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_del_tmr;
+	}
+
+	kfree(stat);
+
+	return ret;
+
+err_del_tmr:
+	ice_vc_fdir_clear_irq_ctx(vf);
+err_exit:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_FDIR_FILTER, v_ret,
+				    (u8 *)stat, len);
+	kfree(stat);
+	return ret;
+}
+
+/**
+ * ice_vf_fdir_init - init FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_init(struct ice_vf *vf)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	idr_init(&fdir->fdir_rule_idr);
+	INIT_LIST_HEAD(&fdir->fdir_rule_list);
+
+	spin_lock_init(&fdir->ctx_lock);
+	fdir->ctx_irq.flags = 0;
+	fdir->ctx_done.flags = 0;
+}
+
+/**
+ * ice_vf_fdir_exit - destroy FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_exit(struct ice_vf *vf)
+{
+	ice_vc_fdir_flush_entry(vf);
+	idr_destroy(&vf->fdir.fdir_rule_idr);
+	ice_vc_fdir_rem_prof_all(vf);
+	ice_vc_fdir_free_prof_all(vf);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
new file mode 100644
index 000000000000..f4e629f4c09b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_FDIR_H_
+#define _ICE_VIRTCHNL_FDIR_H_
+
+struct ice_vf;
+struct ice_pf;
+
+enum ice_fdir_ctx_stat {
+	ICE_FDIR_CTX_READY,
+	ICE_FDIR_CTX_IRQ,
+	ICE_FDIR_CTX_TIMEOUT,
+};
+
+struct ice_vf_fdir_ctx {
+	struct timer_list rx_tmr;
+	enum virtchnl_ops v_opcode;
+	enum ice_fdir_ctx_stat stat;
+	union ice_32b_rx_flex_desc rx_desc;
+#define ICE_VF_FDIR_CTX_VALID		BIT(0)
+	u32 flags;
+
+	void *conf;
+};
+
+/* VF FDIR information structure */
+struct ice_vf_fdir {
+	u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+	int prof_entry_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+	struct ice_fd_hw_prof **fdir_prof;
+
+	struct idr fdir_rule_idr;
+	struct list_head fdir_rule_list;
+
+	spinlock_t ctx_lock; /* protects FDIR context info */
+	struct ice_vf_fdir_ctx ctx_irq;
+	struct ice_vf_fdir_ctx ctx_done;
+};
+
+#ifdef CONFIG_PCI_IOV
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg);
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg);
+void ice_vf_fdir_init(struct ice_vf *vf);
+void ice_vf_fdir_exit(struct ice_vf *vf);
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+			union ice_32b_rx_flex_desc *rx_desc);
+void ice_flush_fdir_ctx(struct ice_pf *pf);
+#else
+static inline void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, union ice_32b_rx_flex_desc *rx_desc) { }
+static inline void ice_flush_fdir_ctx(struct ice_pf *pf) { }
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_VIRTCHNL_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 1f38a8d0c525..78679ece2e08 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -202,6 +202,25 @@ static void ice_vf_vsi_release(struct ice_vf *vf)
 }
 
 /**
+ * ice_vf_ctrl_invalidate_vsi - invalidate ctrl_vsi_idx to remove VSI access
+ * @vf: VF that control VSI is being invalidated on
+ */
+static void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf)
+{
+	vf->ctrl_vsi_idx = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_ctrl_vsi_release - invalidate the VF's control VSI after freeing it
+ * @vf: VF that control VSI is being released on
+ */
+static void ice_vf_ctrl_vsi_release(struct ice_vf *vf)
+{
+	ice_vsi_release(vf->pf->vsi[vf->ctrl_vsi_idx]);
+	ice_vf_ctrl_invalidate_vsi(vf);
+}
+
+/**
  * ice_free_vf_res - Free a VF's resources
  * @vf: pointer to the VF info
  */
@@ -214,6 +233,10 @@ static void ice_free_vf_res(struct ice_vf *vf)
 	 * accessing the VF's VSI after it's freed or invalidated.
 	 */
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+	ice_vf_fdir_exit(vf);
+	/* free VF control VSI */
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		ice_vf_ctrl_vsi_release(vf);
 
 	/* free VSI and disconnect it from the parent uplink */
 	if (vf->lan_vsi_idx != ICE_NO_VSI) {
@@ -560,6 +583,28 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
 }
 
 /**
+ * ice_vf_ctrl_vsi_setup - Set up a VF control VSI
+ * @vf: VF to setup control VSI for
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf)
+{
+	struct ice_port_info *pi = ice_vf_get_port_info(vf);
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id);
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n");
+		ice_vf_ctrl_invalidate_vsi(vf);
+	}
+
+	return vsi;
+}
+
+/**
  * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space
  * @pf: pointer to PF structure
  * @vf: pointer to VF that the first MSIX vector index is being calculated for
@@ -1256,6 +1301,13 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	ice_for_each_vf(pf, v) {
 		vf = &pf->vf[v];
 
+		ice_vf_fdir_exit(vf);
+		/* clean VF control VSI when resetting VFs since it should be
+		 * setup only when VF creates its first FDIR rule.
+		 */
+		if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+			ice_vf_ctrl_invalidate_vsi(vf);
+
 		ice_vf_pre_vsi_rebuild(vf);
 		ice_vf_rebuild_vsi(vf);
 		ice_vf_post_vsi_rebuild(vf);
@@ -1374,6 +1426,13 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 			dev_err(dev, "disabling promiscuous mode failed\n");
 	}
 
+	ice_vf_fdir_exit(vf);
+	/* clean VF control VSI when resetting VF since it should be setup
+	 * only when VF creates its first FDIR rule.
+	 */
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		ice_vf_ctrl_vsi_release(vf);
+
 	ice_vf_pre_vsi_rebuild(vf);
 	ice_vf_rebuild_vsi_with_release(vf);
 	ice_vf_post_vsi_rebuild(vf);
@@ -1532,7 +1591,7 @@ teardown:
 }
 
 /**
- * ice_set_dflt_settings - set VF defaults during initialization/creation
+ * ice_set_dflt_settings_vfs - set VF defaults during initialization/creation
  * @pf: PF holding reference to all VFs for default configuration
  */
 static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
@@ -1549,6 +1608,12 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
 		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
 		vf->spoofchk = true;
 		vf->num_vf_qs = pf->num_qps_per_vf;
+
+		/* ctrl_vsi_idx will be set to a valid value only when VF
+		 * creates its first fdir rule.
+		 */
+		ice_vf_ctrl_invalidate_vsi(vf);
+		ice_vf_fdir_init(vf);
 	}
 }
 
@@ -1848,7 +1913,7 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
  *
  * send msg to VF
  */
-static int
+int
 ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
 		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
 {
@@ -1996,6 +2061,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
 	}
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF;
+
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
 
@@ -2084,7 +2152,7 @@ static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id)
  *
  * check for the valid VSI ID
  */
-static bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
 {
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
@@ -3816,6 +3884,12 @@ error_handler:
 	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
 		err = ice_vc_dis_vlan_stripping(vf);
 		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER:
+		err = ice_vc_add_fdir_fltr(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER:
+		err = ice_vc_del_fdir_fltr(vf, msg);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
@@ -4108,7 +4182,7 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
 }
 
 /**
- * ice_print_vfs_mdd_event - print VFs malicious driver detect event
+ * ice_print_vfs_mdd_events - print VFs malicious driver detect event
  * @pf: pointer to the PF structure
  *
  * Called from ice_handle_mdd_event to rate limit and print VFs MDD events.
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 0f519fba3770..46abc5388fc7 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -4,6 +4,7 @@
 #ifndef _ICE_VIRTCHNL_PF_H_
 #define _ICE_VIRTCHNL_PF_H_
 #include "ice.h"
+#include "ice_virtchnl_fdir.h"
 
 /* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
 #define ICE_MAX_VLAN_PER_VF		8
@@ -70,6 +71,8 @@ struct ice_vf {
 
 	u16 vf_id;			/* VF ID in the PF space */
 	u16 lan_vsi_idx;		/* index into PF struct */
+	u16 ctrl_vsi_idx;
+	struct ice_vf_fdir fdir;
 	/* first vector index of this VF in the PF space */
 	int first_vector_idx;
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
@@ -138,6 +141,11 @@ void
 ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
 void ice_print_vfs_mdd_events(struct ice_pf *pf);
 void ice_print_vf_rx_mdd_event(struct ice_vf *vf);
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf);
+int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen);
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 9f94d9159acd..17ab8ef024ad 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -473,6 +473,14 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		rcu_read_unlock();
+		return result;
+	}
+
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -480,10 +488,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
 		result = ice_xmit_xdp_buff(xdp, xdp_ring);
 		break;
-	case XDP_REDIRECT:
-		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
-		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index 33cceb77e960..29383112bc19 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -441,7 +441,7 @@ out_no_read:
 }
 
 /**
- *  e1000_init_mbx_params_pf - set initial values for pf mailbox
+ *  igb_init_mbx_params_pf - set initial values for pf mailbox
  *  @hw: pointer to the HW structure
  *
  *  Initializes the hw->mbx struct to correct values for pf mailbox
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 8c8eb82e6272..a018000f7db9 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -836,6 +836,7 @@ s32 igb_copper_link_setup_igp(struct e1000_hw *hw)
 			break;
 		case e1000_ms_auto:
 			data &= ~CR_1000T_MS_ENABLE;
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 28baf203459a..7545da216d8b 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2347,35 +2347,23 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 			IGB_TEST_LEN*ETH_GSTRING_LEN);
 		break;
 	case ETH_SS_STATS:
-		for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, igb_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) {
-			memcpy(p, igb_gstrings_net_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igb_gstrings_stats[i].stat_string);
+		for (i = 0; i < IGB_NETDEV_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igb_gstrings_net_stats[i].stat_string);
 		for (i = 0; i < adapter->num_tx_queues; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_restart", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "tx_queue_%u_restart", i);
 		}
 		for (i = 0; i < adapter->num_rx_queues; i++) {
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_drops", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_csum_err", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_alloc_failed", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_drops", i);
+			ethtool_sprintf(&p, "rx_queue_%u_csum_err", i);
+			ethtool_sprintf(&p, "rx_queue_%u_alloc_failed", i);
 		}
 		/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
@@ -3022,6 +3010,7 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 		break;
 	case ETHTOOL_SRXCLSRLDEL:
 		ret = igb_del_ethtool_nfc_entry(adapter, cmd);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a45cd2b416c8..b83966aa6647 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2037,7 +2037,7 @@ static void igb_power_down_link(struct igb_adapter *adapter)
 }
 
 /**
- * Detect and switch function for Media Auto Sense
+ * igb_check_swap_media -  Detect and switch function for Media Auto Sense
  * @adapter: address of the board private structure
  **/
 static void igb_check_swap_media(struct igb_adapter *adapter)
@@ -3115,7 +3115,7 @@ static s32 igb_init_i2c(struct igb_adapter *adapter)
 		return 0;
 
 	/* Initialize the i2c bus which is controlled by the registers.
-	 * This bus will use the i2c_algo_bit structue that implements
+	 * This bus will use the i2c_algo_bit structure that implements
 	 * the protocol through toggling of the 4 bits in the register.
 	 */
 	adapter->i2c_adap.owner = THIS_MODULE;
@@ -4020,7 +4020,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
 }
 
 /**
- *  igb_open - Called when a network interface is made active
+ *  __igb_open - Called when a network interface is made active
  *  @netdev: network interface device structure
  *  @resuming: indicates whether we are in a resume call
  *
@@ -4138,7 +4138,7 @@ int igb_open(struct net_device *netdev)
 }
 
 /**
- *  igb_close - Disables a network interface
+ *  __igb_close - Disables a network interface
  *  @netdev: network interface device structure
  *  @suspending: indicates we are in a suspend call
  *
@@ -5856,7 +5856,7 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring,
 	 */
 	if (tx_ring->launchtime_enable) {
 		ts = ktime_to_timespec64(first->skb->tstamp);
-		first->skb->tstamp = ktime_set(0, 0);
+		skb_txtime_consumed(first->skb);
 		context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
 	} else {
 		context_desc->seqnum_seed = 0;
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 86a576201f5f..ba61fe9bfaf4 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1025,6 +1025,7 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter,
 	switch (config->tx_type) {
 	case HWTSTAMP_TX_OFF:
 		tsync_tx_ctl = 0;
+		break;
 	case HWTSTAMP_TX_ON:
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index b909f00a79e6..35ed997af075 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -441,11 +441,6 @@
 #define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
 #define MII_CR_POWER_DOWN	0x0800  /* Power down */
 #define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
-#define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
-#define MII_CR_RESET		0x8000  /* 0 = normal, 1 = PHY reset */
-#define MII_CR_SPEED_1000	0x0040
-#define MII_CR_SPEED_100	0x2000
-#define MII_CR_SPEED_10		0x0000
 
 /* PHY Status Register */
 #define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
index 7ec04e48860c..cc83bb5c15e8 100644
--- a/drivers/net/ethernet/intel/igc/igc_i225.c
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -6,7 +6,7 @@
 #include "igc_hw.h"
 
 /**
- * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * igc_acquire_nvm_i225 - Acquire exclusive access to EEPROM
  * @hw: pointer to the HW structure
  *
  * Acquire the necessary semaphores for exclusive access to the EEPROM.
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 4d989ebc9713..baa45a1f3a65 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -941,7 +941,7 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
 		ktime_t txtime = first->skb->tstamp;
 
-		first->skb->tstamp = ktime_set(0, 0);
+		skb_txtime_consumed(first->skb);
 		context_desc->launch_time = igc_tx_launchtime(adapter,
 							      txtime);
 	} else {
@@ -3580,7 +3580,7 @@ void igc_up(struct igc_adapter *adapter)
 	netif_tx_start_all_queues(adapter->netdev);
 
 	/* start the watchdog. */
-	hw->mac.get_link_status = 1;
+	hw->mac.get_link_status = true;
 	schedule_work(&adapter->watchdog_task);
 }
 
@@ -4009,7 +4009,7 @@ static irqreturn_t igc_msix_other(int irq, void *data)
 	}
 
 	if (icr & IGC_ICR_LSC) {
-		hw->mac.get_link_status = 1;
+		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__IGC_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@ -4387,7 +4387,7 @@ static irqreturn_t igc_intr_msi(int irq, void *data)
 	}
 
 	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
-		hw->mac.get_link_status = 1;
+		hw->mac.get_link_status = true;
 		if (!test_bit(__IGC_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
@@ -4429,7 +4429,7 @@ static irqreturn_t igc_intr(int irq, void *data)
 	}
 
 	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
-		hw->mac.get_link_status = 1;
+		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__IGC_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@ -4583,7 +4583,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
 	netif_tx_start_all_queues(netdev);
 
 	/* start the watchdog. */
-	hw->mac.get_link_status = 1;
+	hw->mac.get_link_status = true;
 	schedule_work(&adapter->watchdog_task);
 
 	return IGC_SUCCESS;
@@ -4924,7 +4924,7 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
 {
 	struct igc_mac_info *mac = &adapter->hw.mac;
 
-	mac->autoneg = 0;
+	mac->autoneg = false;
 
 	/* Make sure dplx is at most 1 bit and lsb of speed is not set
 	 * for the switch() below to work
@@ -4946,13 +4946,13 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
 		mac->forced_speed_duplex = ADVERTISE_100_FULL;
 		break;
 	case SPEED_1000 + DUPLEX_FULL:
-		mac->autoneg = 1;
+		mac->autoneg = true;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
 		break;
 	case SPEED_1000 + DUPLEX_HALF: /* not supported */
 		goto err_inval;
 	case SPEED_2500 + DUPLEX_FULL:
-		mac->autoneg = 1;
+		mac->autoneg = true;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
 		break;
 	case SPEED_2500 + DUPLEX_HALF: /* not supported */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 8d3798a32f0e..e324e42fab2d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1351,7 +1351,7 @@ static u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
 }
 
 /**
- *  ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter
+ *  ixgbe_fdir_add_signature_filter_82599 - Adds a signature hash filter
  *  @hw: pointer to hardware structure
  *  @input: unique input dword
  *  @common: compressed common input dword
@@ -1542,6 +1542,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
 	switch (input_mask->formatted.vm_pool & 0x7F) {
 	case 0x0:
 		fdirm |= IXGBE_FDIRM_POOL;
+		break;
 	case 0x7F:
 		break;
 	default:
@@ -1557,6 +1558,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
 			hw_dbg(hw, " Error on src/dst port mask\n");
 			return IXGBE_ERR_CONFIG;
 		}
+		break;
 	case IXGBE_ATR_L4TYPE_MASK:
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 62ddb452f862..03ccbe6b66d2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -93,6 +93,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 		default:
 			break;
 		}
+		break;
 	default:
 		break;
 	}
@@ -2707,7 +2708,7 @@ s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw)
 }
 
 /**
- *  ixgbe_enable_rx_buff - Enables the receive data path
+ *  ixgbe_enable_rx_buff_generic - Enables the receive data path
  *  @hw: pointer to hardware structure
  *
  *  Enables the receive data path
@@ -3029,14 +3030,14 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
 }
 
 /**
+ *  ixgbe_set_vmdq_san_mac_generic - Associate VMDq pool index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @vmdq: VMDq pool index
+ *
  *  This function should only be involved in the IOV mode.
  *  In IOV mode, Default pool is next pool after the number of
  *  VFs advertized and not 0.
  *  MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
- *
- *  ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address
- *  @hw: pointer to hardware struct
- *  @vmdq: VMDq pool index
  **/
 s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
 {
@@ -3896,7 +3897,7 @@ static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg,
 }
 
 /**
- *  ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
+ *  ixgbe_get_thermal_sensor_data_generic - Gathers thermal sensor data
  *  @hw: pointer to hardware structure
  *
  *  Returns the thermal sensor data structure
@@ -4054,8 +4055,7 @@ void ixgbe_get_orom_version(struct ixgbe_hw *hw,
 }
 
 /**
- *  ixgbe_get_oem_prod_version Etrack ID from EEPROM
- *
+ *  ixgbe_get_oem_prod_version - Etrack ID from EEPROM
  *  @hw: pointer to hardware structure
  *  @nvm_ver: pointer to output structure
  *
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index a280aa34ca1d..4ceaca0f6ce3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1368,45 +1368,33 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
 static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
 			      u8 *data)
 {
-	char *p = (char *)data;
 	unsigned int i;
+	u8 *p = data;
 
 	switch (stringset) {
 	case ETH_SS_TEST:
-		for (i = 0; i < IXGBE_TEST_LEN; i++) {
-			memcpy(data, ixgbe_gstrings_test[i], ETH_GSTRING_LEN);
-			data += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < IXGBE_TEST_LEN; i++)
+			ethtool_sprintf(&p, ixgbe_gstrings_test[i]);
 		break;
 	case ETH_SS_STATS:
-		for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, ixgbe_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ixgbe_gstrings_stats[i].stat_string);
 		for (i = 0; i < netdev->num_tx_queues; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
 		}
 		for (i = 0; i < IXGBE_NUM_RX_QUEUES; i++) {
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
 		}
 		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
-			sprintf(p, "tx_pb_%u_pxon", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_pb_%u_pxoff", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_pb_%u_pxon", i);
+			ethtool_sprintf(&p, "tx_pb_%u_pxoff", i);
 		}
 		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
-			sprintf(p, "rx_pb_%u_pxon", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_pb_%u_pxoff", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_pb_%u_pxon", i);
+			ethtool_sprintf(&p, "rx_pb_%u_pxoff", i);
 		}
 		/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index df389a11d3af..0218f6c9b925 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -132,6 +132,7 @@ static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
 			else
 				*tx = (tc + 4) << 4;	/* 96, 112 */
 		}
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 03d9aad516d4..19fe21116fe8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -225,7 +225,7 @@ static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter)
 }
 
 /**
- * ixgbe_check_from_parent - Determine whether PCIe info should come from parent
+ * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent
  * @hw: hw specific details
  *
  * This function is used by probe to determine whether a device's PCI-Express
@@ -6158,7 +6158,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 }
 
 /**
- * ixgbe_eee_capable - helper function to determine EEE support on X550
+ * ixgbe_set_eee_capable - helper function to determine EEE support on X550
  * @adapter: board private structure
  */
 static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index fc389eecdd2b..73bc170d1ae9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -461,12 +461,13 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
 }
 
 /**
- *  ixgbe_read_phy_mdi - Reads a value from a specified PHY register without
- *  the SWFW lock
+ *  ixgbe_read_phy_reg_mdi - read PHY register
  *  @hw: pointer to hardware structure
  *  @reg_addr: 32 bit address of PHY register to read
  *  @device_type: 5 bit device type
  *  @phy_data: Pointer to read data from PHY register
+ *
+ *  Reads a value from a specified PHY register without the SWFW lock
  **/
 s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
 		       u16 *phy_data)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 22a874eee2e8..23ddfd79fc8b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -999,6 +999,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 	switch (config->tx_type) {
 	case HWTSTAMP_TX_OFF:
 		tsync_tx_ctl = 0;
+		break;
 	case HWTSTAMP_TX_ON:
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 4b93ba149ec5..d5cfb51ff648 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -701,7 +701,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
 }
 
 /**
- * ixgbe_release_nvm_semaphore - Release hardware semaphore
+ * ixgbe_release_swfw_sync_semaphore - Release hardware semaphore
  * @hw: pointer to hardware structure
  *
  * This function clears hardware semaphore bits.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 5e339afa682a..9724ffb16518 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1248,7 +1248,7 @@ static s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw)
 }
 
 /**
- * ixgbe_fw_recovery_mode - Check FW NVM recovery mode
+ * ixgbe_fw_recovery_mode_X550 - Check FW NVM recovery mode
  * @hw: pointer t hardware structure
  *
  * Returns true if in FW NVM recovery mode.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 3771857cf887..91ad5b902673 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -104,6 +104,13 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+		rcu_read_unlock();
+		return result;
+	}
+
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -115,10 +122,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 		}
 		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
 		break;
-	case XDP_REDIRECT:
-		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
-		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 449d7d5b280d..ba2ed8a43d2d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2633,6 +2633,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 			adapter->num_rx_queues = rss;
 			adapter->num_tx_queues = rss;
 			adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0;
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index bfe6dfcec4ab..5fc347abab3c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -121,9 +121,11 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
 }
 
 /**
+ * ixgbevf_hv_reset_hw_vf - reset via Hyper-V
+ * @hw: pointer to private hardware struct
+ *
  * Hyper-V variant; the VF/PF communication is through the PCI
  * config space.
- * @hw: pointer to private hardware struct
  */
 static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw)
 {
@@ -513,9 +515,11 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 }
 
 /**
- * Hyper-V variant - just a stub.
+ * ixgbevf_hv_update_mc_addr_list_vf - stub
  * @hw: unused
  * @netdev: unused
+ *
+ * Hyper-V variant - just a stub.
  */
 static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 					     struct net_device *netdev)
@@ -564,9 +568,11 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 }
 
 /**
- * Hyper-V variant - just a stub.
+ * ixgbevf_hv_update_xcast_mode - stub
  * @hw: unused
  * @xcast_mode: unused
+ *
+ * Hyper-V variant - just a stub.
  */
 static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 {
@@ -608,7 +614,7 @@ mbx_err:
 }
 
 /**
- * Hyper-V variant - just a stub.
+ * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub.
  * @hw: unused
  * @vlan: unused
  * @vind: unused
@@ -726,11 +732,13 @@ out:
 }
 
 /**
- * Hyper-V variant; there is no mailbox communication.
+ * ixgbevf_hv_check_mac_link_vf - check link
  * @hw: pointer to private hardware struct
  * @speed: pointer to link speed
  * @link_up: true is link is up, false otherwise
  * @autoneg_wait_to_complete: unused
+ *
+ * Hyper-V variant; there is no mailbox communication.
  */
 static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw,
 					ixgbe_link_speed *speed,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 68deae529bc9..fac6474ad694 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -30,10 +30,35 @@
 static LIST_HEAD(cgx_list);
 
 /* Convert firmware speed encoding to user format(Mbps) */
-static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX];
+static const u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX] = {
+	[CGX_LINK_NONE] = 0,
+	[CGX_LINK_10M] = 10,
+	[CGX_LINK_100M] = 100,
+	[CGX_LINK_1G] = 1000,
+	[CGX_LINK_2HG] = 2500,
+	[CGX_LINK_5G] = 5000,
+	[CGX_LINK_10G] = 10000,
+	[CGX_LINK_20G] = 20000,
+	[CGX_LINK_25G] = 25000,
+	[CGX_LINK_40G] = 40000,
+	[CGX_LINK_50G] = 50000,
+	[CGX_LINK_80G] = 80000,
+	[CGX_LINK_100G] = 100000,
+};
 
 /* Convert firmware lmac type encoding to string */
-static char *cgx_lmactype_string[LMAC_MODE_MAX];
+static const char *cgx_lmactype_string[LMAC_MODE_MAX] = {
+	[LMAC_MODE_SGMII] = "SGMII",
+	[LMAC_MODE_XAUI] = "XAUI",
+	[LMAC_MODE_RXAUI] = "RXAUI",
+	[LMAC_MODE_10G_R] = "10G_R",
+	[LMAC_MODE_40G_R] = "40G_R",
+	[LMAC_MODE_QSGMII] = "QSGMII",
+	[LMAC_MODE_25G_R] = "25G_R",
+	[LMAC_MODE_50G_R] = "50G_R",
+	[LMAC_MODE_100G_R] = "100G_R",
+	[LMAC_MODE_USXGMII] = "USXGMII",
+};
 
 /* CGX PHY management internal APIs */
 static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en);
@@ -659,34 +684,6 @@ int cgx_fwi_cmd_generic(u64 req, u64 *resp, struct cgx *cgx, int lmac_id)
 	return err;
 }
 
-static inline void cgx_link_usertable_init(void)
-{
-	cgx_speed_mbps[CGX_LINK_NONE] = 0;
-	cgx_speed_mbps[CGX_LINK_10M] = 10;
-	cgx_speed_mbps[CGX_LINK_100M] = 100;
-	cgx_speed_mbps[CGX_LINK_1G] = 1000;
-	cgx_speed_mbps[CGX_LINK_2HG] = 2500;
-	cgx_speed_mbps[CGX_LINK_5G] = 5000;
-	cgx_speed_mbps[CGX_LINK_10G] = 10000;
-	cgx_speed_mbps[CGX_LINK_20G] = 20000;
-	cgx_speed_mbps[CGX_LINK_25G] = 25000;
-	cgx_speed_mbps[CGX_LINK_40G] = 40000;
-	cgx_speed_mbps[CGX_LINK_50G] = 50000;
-	cgx_speed_mbps[CGX_LINK_80G] = 80000;
-	cgx_speed_mbps[CGX_LINK_100G] = 100000;
-
-	cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII";
-	cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI";
-	cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI";
-	cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R";
-	cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R";
-	cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII";
-	cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R";
-	cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R";
-	cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R";
-	cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII";
-}
-
 static int cgx_link_usertable_index_map(int speed)
 {
 	switch (speed) {
@@ -828,7 +825,7 @@ static inline void link_status_user_format(u64 lstat,
 					   struct cgx_link_user_info *linfo,
 					   struct cgx *cgx, u8 lmac_id)
 {
-	char *lmac_string;
+	const char *lmac_string;
 
 	linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
 	linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
@@ -1377,7 +1374,6 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	list_add(&cgx->cgx_list, &cgx_list);
 
-	cgx_link_usertable_init();
 
 	cgx_populate_features(cgx);
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index ea456099b33c..55629c66586e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -74,13 +74,13 @@ struct otx2_mbox {
 	struct otx2_mbox_dev *dev;
 };
 
-/* Header which preceeds all mbox messages */
+/* Header which precedes all mbox messages */
 struct mbox_hdr {
 	u64 msg_size;	/* Total msgs size embedded */
 	u16  num_msgs;   /* No of msgs embedded */
 };
 
-/* Header which preceeds every msg and is also part of it */
+/* Header which precedes every msg and is also part of it */
 struct mbox_msghdr {
 	u16 pcifunc;     /* Who's sending this msg */
 	u16 id;          /* Mbox message ID */
@@ -216,6 +216,9 @@ M(NPC_MCAM_READ_ENTRY,	  0x600f, npc_mcam_read_entry,			\
 				  npc_mcam_read_entry_rsp)		\
 M(NPC_MCAM_READ_BASE_RULE, 0x6011, npc_read_base_steer_rule,            \
 				   msg_req, npc_mcam_read_base_rule_rsp)  \
+M(NPC_MCAM_GET_STATS, 0x6012, npc_mcam_entry_stats,                     \
+				   npc_mcam_get_stats_req,              \
+				   npc_mcam_get_stats_rsp)              \
 /* NIX mbox IDs (range 0x8000 - 0xFFFF) */				\
 M(NIX_LF_ALLOC,		0x8000, nix_lf_alloc,				\
 				 nix_lf_alloc_req, nix_lf_alloc_rsp)	\
@@ -277,8 +280,8 @@ struct msg_req {
 	struct mbox_msghdr hdr;
 };
 
-/* Generic rsponse msg used a ack or response for those mbox
- * messages which doesn't have a specific rsp msg format.
+/* Generic response msg used an ack or response for those mbox
+ * messages which don't have a specific rsp msg format.
  */
 struct msg_rsp {
 	struct mbox_msghdr hdr;
@@ -299,7 +302,7 @@ struct ready_msg_rsp {
 
 /* Structure for requesting resource provisioning.
  * 'modify' flag to be used when either requesting more
- * or to detach partial of a cetain resource type.
+ * or to detach partial of a certain resource type.
  * Rest of the fields specify how many of what type to
  * be attached.
  * To request LFs from two blocks of same type this mailbox
@@ -489,7 +492,7 @@ struct cgx_set_link_mode_rsp {
 };
 
 #define RVU_LMAC_FEAT_FC		BIT_ULL(0) /* pause frames */
-#define RVU_LMAC_FEAT_PTP		BIT_ULL(1) /* precison time protocol */
+#define RVU_LMAC_FEAT_PTP		BIT_ULL(1) /* precision time protocol */
 #define RVU_MAC_VERSION			BIT_ULL(2)
 #define RVU_MAC_CGX			BIT_ULL(3)
 #define RVU_MAC_RPM			BIT_ULL(4)
@@ -605,6 +608,7 @@ enum nix_af_status {
 	NIX_AF_INVAL_SSO_PF_FUNC    = -420,
 	NIX_AF_ERR_TX_VTAG_NOSPC    = -421,
 	NIX_AF_ERR_RX_VTAG_INUSE    = -422,
+	NIX_AF_ERR_NPC_KEY_NOT_SUPP = -423,
 };
 
 /* For NIX RX vtag action  */
@@ -1141,6 +1145,7 @@ struct npc_install_flow_req {
 	u64 features;
 	u16 entry;
 	u16 channel;
+	u16 chan_mask;
 	u8 intf;
 	u8 set_cntr; /* If counter is available set counter for this entry ? */
 	u8 default_rule;
@@ -1193,6 +1198,17 @@ struct npc_mcam_read_base_rule_rsp {
 	struct mcam_entry entry;
 };
 
+struct npc_mcam_get_stats_req {
+	struct mbox_msghdr hdr;
+	u16 entry; /* mcam entry */
+};
+
+struct npc_mcam_get_stats_rsp {
+	struct mbox_msghdr hdr;
+	u64 stat;  /* counter stats */
+	u8 stat_ena; /* enabled */
+};
+
 enum ptp_op {
 	PTP_OP_ADJFINE = 0,
 	PTP_OP_GET_CLOCK = 1,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 3c640f6aba92..1e012e787260 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -167,6 +167,8 @@ enum key_fields {
 	NPC_IPPROTO_SCTP,
 	NPC_IPPROTO_AH,
 	NPC_IPPROTO_ESP,
+	NPC_IPPROTO_ICMP,
+	NPC_IPPROTO_ICMP6,
 	NPC_SPORT_TCP,
 	NPC_DPORT_TCP,
 	NPC_SPORT_UDP,
@@ -420,6 +422,11 @@ struct nix_tx_action {
 #define TX_VTAG1_LID_MASK		GENMASK_ULL(42, 40)
 #define TX_VTAG1_RELPTR_MASK		GENMASK_ULL(39, 32)
 
+/* NPC MCAM reserved entry index per nixlf */
+#define NIXLF_UCAST_ENTRY	0
+#define NIXLF_BCAST_ENTRY	1
+#define NIXLF_PROMISC_ENTRY	2
+
 struct npc_mcam_kex {
 	/* MKEX Profle Header */
 	u64 mkex_sign; /* "mcam-kex-profile" (8 bytes/ASCII characters) */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 76f399229ddb..c2cc4806d13c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -548,6 +548,12 @@ static inline int is_afvf(u16 pcifunc)
 	return !(pcifunc & ~RVU_PFVF_FUNC_MASK);
 }
 
+/* check if PF_FUNC is AF */
+static inline bool is_pffunc_af(u16 pcifunc)
+{
+	return !pcifunc;
+}
+
 static inline bool is_rvu_fwdata_valid(struct rvu *rvu)
 {
 	return (rvu->fwdata->header_magic == RVU_FWDATA_HEADER_MAGIC) &&
@@ -640,7 +646,8 @@ int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool en);
 void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
 				 int nixlf, u64 chan, u8 *mac_addr);
 void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
-				   int nixlf, u64 chan, bool allmulti);
+				   int nixlf, u64 chan, u8 chan_cnt,
+				   bool allmulti);
 void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
@@ -665,9 +672,6 @@ int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena);
 int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel);
 int npc_flow_steering_init(struct rvu *rvu, int blkaddr);
 const char *npc_get_field_name(u8 hdr);
-bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf,
-				u16 pcifunc, u8 intf, struct mcam_entry *entry,
-				int *entry_index);
 int npc_get_bank(struct npc_mcam *mcam, int index);
 void npc_mcam_enable_flows(struct rvu *rvu, u16 target);
 void npc_mcam_disable_flows(struct rvu *rvu, u16 target);
@@ -680,6 +684,11 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature);
 u32  rvu_cgx_get_fifolen(struct rvu *rvu);
 void *rvu_first_cgx_pdata(struct rvu *rvu);
 
+int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
+			     int type);
+bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr,
+			   int index);
+
 /* CPT APIs */
 int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot);
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index de3968d2e5ce..9bf8eaabf9ab 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2017,7 +2017,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s,
 			seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.etype));
 			break;
 		case NPC_OUTER_VID:
-			seq_printf(s, "%d ", ntohs(rule->packet.vlan_tci));
+			seq_printf(s, "0x%x ", ntohs(rule->packet.vlan_tci));
 			seq_printf(s, "mask 0x%x\n",
 				   ntohs(rule->mask.vlan_tci));
 			break;
@@ -2160,7 +2160,7 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused)
 		seq_printf(s, "\tmcam entry: %d\n", iter->entry);
 
 		rvu_dbg_npc_mcam_show_flows(s, iter);
-		if (iter->intf == NIX_INTF_RX) {
+		if (is_npc_intf_rx(iter->intf)) {
 			target = iter->rx_action.pf_func;
 			pf = (target >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
 			seq_printf(s, "\tForward to: PF%d ", pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 3d068b7d46bd..0a8bd667cb11 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -273,7 +273,8 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
 		pfvf->rx_chan_cnt = 1;
 		pfvf->tx_chan_cnt = 1;
 		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
-					      pfvf->rx_chan_base, false);
+					      pfvf->rx_chan_base,
+					      pfvf->rx_chan_cnt, false);
 		break;
 	}
 
@@ -3088,7 +3089,8 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
 		rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
 	else
 		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
-					      pfvf->rx_chan_base, allmulti);
+					      pfvf->rx_chan_base,
+					      pfvf->rx_chan_cnt, allmulti);
 	return 0;
 }
 
@@ -3635,9 +3637,7 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req,
 	if (err)
 		return err;
 
-	rvu_npc_disable_default_entries(rvu, pcifunc, nixlf);
-
-	npc_mcam_disable_flows(rvu, pcifunc);
+	rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf);
 
 	return rvu_cgx_start_stop_io(rvu, pcifunc, false);
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 0bd49c7080a6..0bc4529691ec 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -22,10 +22,6 @@
 #define RSVD_MCAM_ENTRIES_PER_PF	2 /* Bcast & Promisc */
 #define RSVD_MCAM_ENTRIES_PER_NIXLF	1 /* Ucast for LFs */
 
-#define NIXLF_UCAST_ENTRY	0
-#define NIXLF_BCAST_ENTRY	1
-#define NIXLF_PROMISC_ENTRY	2
-
 #define NPC_PARSE_RESULT_DMAC_OFFSET	8
 #define NPC_HW_TSTAMP_OFFSET		8
 #define NPC_KEX_CHAN_MASK		0xFFFULL
@@ -96,6 +92,10 @@ int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel)
 	if (is_npc_intf_tx(intf))
 		return 0;
 
+	/* return in case of AF installed rules */
+	if (is_pffunc_af(pcifunc))
+		return 0;
+
 	if (is_afvf(pcifunc)) {
 		end = rvu_get_num_lbk_chans();
 		if (end < 0)
@@ -196,8 +196,8 @@ static int npc_get_ucast_mcam_index(struct npc_mcam *mcam, u16 pcifunc,
 	return mcam->nixlf_offset + (max + nixlf) * RSVD_MCAM_ENTRIES_PER_NIXLF;
 }
 
-static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
-				    u16 pcifunc, int nixlf, int type)
+int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
+			     u16 pcifunc, int nixlf, int type)
 {
 	int pf = rvu_get_pf(pcifunc);
 	int index;
@@ -230,8 +230,8 @@ int npc_get_bank(struct npc_mcam *mcam, int index)
 	return bank;
 }
 
-static bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam,
-				  int blkaddr, int index)
+bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam,
+			   int blkaddr, int index)
 {
 	int bank = npc_get_bank(mcam, index);
 	u64 cfg;
@@ -647,13 +647,17 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
 }
 
 void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
-				   int nixlf, u64 chan, bool allmulti)
+				   int nixlf, u64 chan, u8 chan_cnt,
+				   bool allmulti)
 {
 	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct npc_install_flow_req req = { 0 };
+	struct npc_install_flow_rsp rsp = { 0 };
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	int blkaddr, ucast_idx, index, kwi;
-	struct mcam_entry entry = { {0} };
-	struct nix_rx_action action = { };
+	int blkaddr, ucast_idx, index;
+	u8 mac_addr[ETH_ALEN] = { 0 };
+	struct nix_rx_action action;
+	u64 relaxed_mask;
 
 	/* Only PF or AF VF can add a promiscuous entry */
 	if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc))
@@ -663,24 +667,15 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 	if (blkaddr < 0)
 		return;
 
+	*(u64 *)&action = 0x00;
 	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
 					 nixlf, NIXLF_PROMISC_ENTRY);
 
-	entry.kw[0] = chan;
-	entry.kw_mask[0] = 0xFFFULL;
-
-	if (allmulti) {
-		kwi = NPC_KEXOF_DMAC / sizeof(u64);
-		entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */
-		entry.kw_mask[kwi] = BIT_ULL(40);
-	}
-
-	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
-					     nixlf, NIXLF_UCAST_ENTRY);
-
 	/* If the corresponding PF's ucast action is RSS,
 	 * use the same action for promisc also
 	 */
+	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					     nixlf, NIXLF_UCAST_ENTRY);
 	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
 		*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
 							blkaddr, ucast_idx);
@@ -691,9 +686,36 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 		action.pf_func = pcifunc;
 	}
 
-	entry.action = *(u64 *)&action;
-	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
-			      pfvf->nix_rx_intf, &entry, true);
+	if (allmulti) {
+		mac_addr[0] = 0x01;	/* LSB bit of 1st byte in DMAC */
+		ether_addr_copy(req.packet.dmac, mac_addr);
+		ether_addr_copy(req.mask.dmac, mac_addr);
+		req.features = BIT_ULL(NPC_DMAC);
+	}
+
+	req.chan_mask = 0xFFFU;
+	if (chan_cnt > 1) {
+		if (!is_power_of_2(chan_cnt)) {
+			dev_err(rvu->dev,
+				"%s: channel count more than 1, must be power of 2\n", __func__);
+			return;
+		}
+		relaxed_mask = GENMASK_ULL(BITS_PER_LONG_LONG - 1,
+					   ilog2(chan_cnt));
+		req.chan_mask &= relaxed_mask;
+	}
+
+	req.channel = chan;
+	req.intf = pfvf->nix_rx_intf;
+	req.entry = index;
+	req.op = action.op;
+	req.hdr.pcifunc = 0; /* AF is requester */
+	req.vf = pcifunc;
+	req.index = action.index;
+	req.match_id = action.match_id;
+	req.flow_key_alg = action.flow_key_alg;
+
+	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
 }
 
 static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc,
@@ -728,12 +750,14 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf)
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 				       int nixlf, u64 chan)
 {
+	struct rvu_pfvf *pfvf;
+	struct npc_install_flow_req req = { 0 };
+	struct npc_install_flow_rsp rsp = { 0 };
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	struct mcam_entry entry = { {0} };
 	struct rvu_hwinfo *hw = rvu->hw;
-	struct nix_rx_action action;
-	struct rvu_pfvf *pfvf;
 	int blkaddr, index;
+	u32 req_index = 0;
+	u8 op;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
 	if (blkaddr < 0)
@@ -755,32 +779,29 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
 					 nixlf, NIXLF_BCAST_ENTRY);
 
-	/* Match ingress channel */
-	entry.kw[0] = chan;
-	entry.kw_mask[0] = 0xfffull;
-
-	/* Match broadcast MAC address.
-	 * DMAC is extracted at 0th bit of PARSE_KEX::KW1
-	 */
-	entry.kw[1] = 0xffffffffffffull;
-	entry.kw_mask[1] = 0xffffffffffffull;
-
-	*(u64 *)&action = 0x00;
 	if (!hw->cap.nix_rx_multicast) {
 		/* Early silicon doesn't support pkt replication,
 		 * so install entry with UCAST action, so that PF
 		 * receives all broadcast packets.
 		 */
-		action.op = NIX_RX_ACTIONOP_UCAST;
-		action.pf_func = pcifunc;
+		op = NIX_RX_ACTIONOP_UCAST;
 	} else {
-		action.index = pfvf->bcast_mce_idx;
-		action.op = NIX_RX_ACTIONOP_MCAST;
+		op = NIX_RX_ACTIONOP_MCAST;
+		req_index = pfvf->bcast_mce_idx;
 	}
 
-	entry.action = *(u64 *)&action;
-	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
-			      pfvf->nix_rx_intf, &entry, true);
+	eth_broadcast_addr((u8 *)&req.packet.dmac);
+	eth_broadcast_addr((u8 *)&req.mask.dmac);
+	req.features = BIT_ULL(NPC_DMAC);
+	req.channel = chan;
+	req.intf = pfvf->nix_rx_intf;
+	req.entry = index;
+	req.op = op;
+	req.hdr.pcifunc = 0; /* AF is requester */
+	req.vf = pcifunc;
+	req.index = req_index;
+
+	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
 }
 
 void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable)
@@ -967,7 +988,7 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 {
 	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	struct rvu_npc_mcam_rule *rule;
+	struct rvu_npc_mcam_rule *rule, *tmp;
 	int blkaddr;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -977,15 +998,18 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 	mutex_lock(&mcam->lock);
 
 	/* Disable MCAM entries directing traffic to this 'pcifunc' */
-	list_for_each_entry(rule, &mcam->mcam_rules, list) {
+	list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) {
 		if (is_npc_intf_rx(rule->intf) &&
 		    rule->rx_action.pf_func == pcifunc) {
 			npc_enable_mcam_entry(rvu, mcam, blkaddr,
 					      rule->entry, false);
 			rule->enable = false;
 			/* Indicate that default rule is disabled */
-			if (rule->default_rule)
+			if (rule->default_rule) {
 				pfvf->def_ucast_rule = NULL;
+				list_del(&rule->list);
+				kfree(rule);
+			}
 		}
 	}
 
@@ -1674,6 +1698,9 @@ void rvu_npc_get_mcam_counter_alloc_info(struct rvu *rvu, u16 pcifunc,
 static int npc_mcam_verify_entry(struct npc_mcam *mcam,
 				 u16 pcifunc, int entry)
 {
+	/* verify AF installed entries */
+	if (is_pffunc_af(pcifunc))
+		return 0;
 	/* Verify if entry is valid and if it is indeed
 	 * allocated to the requesting PFFUNC.
 	 */
@@ -2268,6 +2295,10 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
 		goto exit;
 	}
 
+	/* For AF installed rules, the nix_intf should be set to target NIX */
+	if (is_pffunc_af(req->hdr.pcifunc))
+		nix_intf = req->intf;
+
 	npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, nix_intf,
 			      &req->entry_data, req->enable_entry);
 
@@ -2730,30 +2761,6 @@ int rvu_mbox_handler_npc_get_kex_cfg(struct rvu *rvu, struct msg_req *req,
 	return 0;
 }
 
-bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf,
-				u16 pcifunc, u8 intf, struct mcam_entry *entry,
-				int *index)
-{
-	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
-	struct npc_mcam *mcam = &rvu->hw->mcam;
-	bool enable;
-	u8 nix_intf;
-
-	if (is_npc_intf_tx(intf))
-		nix_intf = pfvf->nix_tx_intf;
-	else
-		nix_intf = pfvf->nix_rx_intf;
-
-	*index = npc_get_nixlf_mcam_index(mcam, pcifunc,
-					  nixlf, NIXLF_UCAST_ENTRY);
-	/* dont force enable unicast entry  */
-	enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, *index);
-	npc_config_mcam_entry(rvu, mcam, blkaddr, *index, nix_intf,
-			      entry, enable);
-
-	return enable;
-}
-
 int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu,
 					      struct msg_req *req,
 					      struct npc_mcam_read_base_rule_rsp *rsp)
@@ -2799,3 +2806,42 @@ read_entry:
 out:
 	return rc;
 }
+
+int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu,
+					  struct npc_mcam_get_stats_req *req,
+					  struct npc_mcam_get_stats_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 index, cntr;
+	int blkaddr;
+	u64 regval;
+	u32 bank;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+
+	index = req->entry & (mcam->banksize - 1);
+	bank = npc_get_bank(mcam, req->entry);
+
+	/* read MCAM entry STAT_ACT register */
+	regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank));
+
+	if (!(regval & BIT_ULL(9))) {
+		rsp->stat_ena = 0;
+		mutex_unlock(&mcam->lock);
+		return 0;
+	}
+
+	cntr = regval & 0x1FF;
+
+	rsp->stat_ena = 1;
+	rsp->stat = rvu_read64(rvu, blkaddr, NPC_AF_MATCH_STATX(cntr));
+	rsp->stat &= BIT_ULL(48) - 1;
+
+	mutex_unlock(&mcam->lock);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 4ba9d54ce4e3..7f35b62eea13 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -29,6 +29,8 @@ static const char * const npc_flow_names[] = {
 	[NPC_IPPROTO_TCP] = "ip proto tcp",
 	[NPC_IPPROTO_UDP] = "ip proto udp",
 	[NPC_IPPROTO_SCTP] = "ip proto sctp",
+	[NPC_IPPROTO_ICMP] = "ip proto icmp",
+	[NPC_IPPROTO_ICMP6] = "ip proto icmp6",
 	[NPC_IPPROTO_AH] = "ip proto AH",
 	[NPC_IPPROTO_ESP] = "ip proto ESP",
 	[NPC_SPORT_TCP]	= "tcp source port",
@@ -427,6 +429,7 @@ do {									       \
 	 * packet header fields below.
 	 * Example: Source IP is 4 bytes and starts at 12th byte of IP header
 	 */
+	NPC_SCAN_HDR(NPC_TOS, NPC_LID_LC, NPC_LT_LC_IP, 1, 1);
 	NPC_SCAN_HDR(NPC_SIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 12, 4);
 	NPC_SCAN_HDR(NPC_DIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 16, 4);
 	NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16);
@@ -477,9 +480,12 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf)
 				     BIT_ULL(NPC_IPPROTO_SCTP);
 	}
 
-	/* for AH, check if corresponding layer type is present in the key */
-	if (npc_check_field(rvu, blkaddr, NPC_LD, intf))
+	/* for AH/ICMP/ICMPv6/, check if corresponding layer type is present in the key */
+	if (npc_check_field(rvu, blkaddr, NPC_LD, intf)) {
 		*features |= BIT_ULL(NPC_IPPROTO_AH);
+		*features |= BIT_ULL(NPC_IPPROTO_ICMP);
+		*features |= BIT_ULL(NPC_IPPROTO_ICMP6);
+	}
 
 	/* for ESP, check if corresponding layer type is present in the key */
 	if (npc_check_field(rvu, blkaddr, NPC_LE, intf))
@@ -597,7 +603,7 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf)
 		dev_info(rvu->dev, "Unsupported flow(s):\n");
 		for_each_set_bit(bit, (unsigned long *)&unsupported, 64)
 			dev_info(rvu->dev, "%s ", npc_get_field_name(bit));
-		return -EOPNOTSUPP;
+		return NIX_AF_ERR_NPC_KEY_NOT_SUPP;
 	}
 
 	return 0;
@@ -769,6 +775,12 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry,
 	if (features & BIT_ULL(NPC_IPPROTO_SCTP))
 		npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_SCTP,
 				 0, ~0ULL, 0, intf);
+	if (features & BIT_ULL(NPC_IPPROTO_ICMP))
+		npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP,
+				 0, ~0ULL, 0, intf);
+	if (features & BIT_ULL(NPC_IPPROTO_ICMP6))
+		npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP6,
+				 0, ~0ULL, 0, intf);
 
 	if (features & BIT_ULL(NPC_OUTER_VID))
 		npc_update_entry(rvu, NPC_LB, entry,
@@ -798,6 +810,7 @@ do {									      \
 	NPC_WRITE_FLOW(NPC_SMAC, smac, smac_val, 0, smac_mask, 0);
 	NPC_WRITE_FLOW(NPC_ETYPE, etype, ntohs(pkt->etype), 0,
 		       ntohs(mask->etype), 0);
+	NPC_WRITE_FLOW(NPC_TOS, tos, pkt->tos, 0, mask->tos, 0);
 	NPC_WRITE_FLOW(NPC_SIP_IPV4, ip4src, ntohl(pkt->ip4src), 0,
 		       ntohl(mask->ip4src), 0);
 	NPC_WRITE_FLOW(NPC_DIP_IPV4, ip4dst, ntohl(pkt->ip4dst), 0,
@@ -903,9 +916,11 @@ static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf,
 				struct npc_install_flow_req *req, u16 target)
 {
 	struct nix_rx_action action;
+	u64 chan_mask;
 
-	npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0,
-			 ~0ULL, 0, NIX_INTF_RX);
+	chan_mask = req->chan_mask ? req->chan_mask : ~0ULL;
+	npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0,
+			 NIX_INTF_RX);
 
 	*(u64 *)&action = 0x00;
 	action.pf_func = target;
@@ -998,33 +1013,21 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
 	if (is_npc_intf_tx(req->intf))
 		goto find_rule;
 
-	if (def_ucast_rule)
+	if (req->default_rule) {
+		entry_index = npc_get_nixlf_mcam_index(mcam, target, nixlf,
+						       NIXLF_UCAST_ENTRY);
+		enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, entry_index);
+	}
+
+	/* update mcam entry with default unicast rule attributes */
+	if (def_ucast_rule && (msg_from_vf || (req->default_rule && req->append))) {
 		missing_features = (def_ucast_rule->features ^ features) &
 					def_ucast_rule->features;
-
-	if (req->default_rule && req->append) {
-		/* add to default rule */
 		if (missing_features)
 			npc_update_flow(rvu, entry, missing_features,
 					&def_ucast_rule->packet,
 					&def_ucast_rule->mask,
 					&dummy, req->intf);
-		enable = rvu_npc_write_default_rule(rvu, blkaddr,
-						    nixlf, target,
-						    pfvf->nix_rx_intf, entry,
-						    &entry_index);
-		installed_features = req->features | missing_features;
-	} else if (req->default_rule && !req->append) {
-		/* overwrite default rule */
-		enable = rvu_npc_write_default_rule(rvu, blkaddr,
-						    nixlf, target,
-						    pfvf->nix_rx_intf, entry,
-						    &entry_index);
-	} else if (msg_from_vf) {
-		/* normal rule - include default rule also to it for VF */
-		npc_update_flow(rvu, entry, missing_features,
-				&def_ucast_rule->packet, &def_ucast_rule->mask,
-				&dummy, req->intf);
 		installed_features = req->features | missing_features;
 	}
 
@@ -1036,12 +1039,9 @@ find_rule:
 			return -ENOMEM;
 		new = true;
 	}
-	/* no counter for default rule */
-	if (req->default_rule)
-		goto update_rule;
 
 	/* allocate new counter if rule has no counter */
-	if (req->set_cntr && !rule->has_cntr)
+	if (!req->default_rule && req->set_cntr && !rule->has_cntr)
 		rvu_mcam_add_counter_to_rule(rvu, owner, rule, rsp);
 
 	/* if user wants to delete an existing counter for a rule then
@@ -1051,7 +1051,14 @@ find_rule:
 		rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
 
 	write_req.hdr.pcifunc = owner;
-	write_req.entry = req->entry;
+
+	/* AF owns the default rules so change the owner just to relax
+	 * the checks in rvu_mbox_handler_npc_mcam_write_entry
+	 */
+	if (req->default_rule)
+		write_req.hdr.pcifunc = 0;
+
+	write_req.entry = entry_index;
 	write_req.intf = req->intf;
 	write_req.enable_entry = (u8)enable;
 	/* if counter is available then clear and use it */
@@ -1069,7 +1076,7 @@ find_rule:
 			kfree(rule);
 		return err;
 	}
-update_rule:
+	/* update rule */
 	memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet));
 	memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask));
 	rule->entry = entry_index;
@@ -1145,8 +1152,13 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 	else
 		target = req->hdr.pcifunc;
 
-	if (npc_check_unsupported_flows(rvu, req->features, req->intf))
-		return -EOPNOTSUPP;
+	/* ignore chan_mask in case pf func is not AF, revisit later */
+	if (!is_pffunc_af(req->hdr.pcifunc))
+		req->chan_mask = 0xFFF;
+
+	err = npc_check_unsupported_flows(rvu, req->features, req->intf);
+	if (err)
+		return err;
 
 	if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel))
 		return -EINVAL;
@@ -1278,6 +1290,7 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr,
 
 	write_req.hdr.pcifunc = rule->owner;
 	write_req.entry = rule->entry;
+	write_req.intf = pfvf->nix_rx_intf;
 
 	mutex_unlock(&mcam->lock);
 	err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, &rsp);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 745aa8a19499..457c94793e63 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o
 obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o
 
 rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
-		     otx2_ptp.o otx2_flows.o cn10k.o
+		     otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o
 rvu_nicvf-y := otx2_vf.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index a518c2283f18..45730d0d92f2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -18,6 +18,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <linux/soc/marvell/octeontx2/asm.h>
+#include <net/pkt_cls.h>
 
 #include <mbox.h>
 #include <npc.h>
@@ -264,6 +265,7 @@ struct otx2_flow_config {
 #define OTX2_MAX_NTUPLE_FLOWS	32
 #define OTX2_MAX_UNICAST_FLOWS	8
 #define OTX2_MAX_VLAN_FLOWS	1
+#define OTX2_MAX_TC_FLOWS	OTX2_MAX_NTUPLE_FLOWS
 #define OTX2_MCAM_COUNT		(OTX2_MAX_NTUPLE_FLOWS + \
 				 OTX2_MAX_UNICAST_FLOWS + \
 				 OTX2_MAX_VLAN_FLOWS)
@@ -274,10 +276,20 @@ struct otx2_flow_config {
 #define OTX2_PER_VF_VLAN_FLOWS	2 /* rx+tx per VF */
 #define OTX2_VF_VLAN_RX_INDEX	0
 #define OTX2_VF_VLAN_TX_INDEX	1
+	u32			tc_flower_offset;
 	u32                     ntuple_max_flows;
+	u32			tc_max_flows;
 	struct list_head	flow_list;
 };
 
+struct otx2_tc_info {
+	/* hash table to store TC offloaded flows */
+	struct rhashtable		flow_table;
+	struct rhashtable_params	flow_ht_params;
+	DECLARE_BITMAP(tc_entries_bitmap, OTX2_MAX_TC_FLOWS);
+	unsigned long			num_entries;
+};
+
 struct dev_hw_ops {
 	int	(*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura);
 	void	(*sqe_flush)(void *dev, struct otx2_snd_queue *sq,
@@ -305,6 +317,8 @@ struct otx2_nic {
 #define OTX2_FLAG_PF_SHUTDOWN			BIT_ULL(8)
 #define OTX2_FLAG_RX_PAUSE_ENABLED		BIT_ULL(9)
 #define OTX2_FLAG_TX_PAUSE_ENABLED		BIT_ULL(10)
+#define OTX2_FLAG_TC_FLOWER_SUPPORT		BIT_ULL(11)
+#define OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED	BIT_ULL(12)
 	u64			flags;
 
 	struct otx2_qset	qset;
@@ -347,6 +361,7 @@ struct otx2_nic {
 	struct hwtstamp_config	tstamp;
 
 	struct otx2_flow_config	*flow_cfg;
+	struct otx2_tc_info	tc_info;
 };
 
 static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -802,4 +817,9 @@ int otx2_add_macfilter(struct net_device *netdev, const u8 *mac);
 int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable);
 int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf);
 u16 otx2_get_max_mtu(struct otx2_nic *pfvf);
+/* tc support */
+int otx2_init_tc(struct otx2_nic *nic);
+void otx2_shutdown_tc(struct otx2_nic *nic);
+int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		  void *type_data);
 #endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index dc1778420978..0b4fa92ba821 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -57,10 +57,13 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
 		flow_cfg->ntuple_max_flows = rsp->count;
 		flow_cfg->ntuple_offset = 0;
 		pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
+		flow_cfg->tc_max_flows = flow_cfg->ntuple_max_flows;
+		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
 	} else {
 		flow_cfg->vf_vlan_offset = 0;
 		flow_cfg->ntuple_offset = flow_cfg->vf_vlan_offset +
 						vf_vlan_max_flows;
+		flow_cfg->tc_flower_offset = flow_cfg->ntuple_offset;
 		flow_cfg->unicast_offset = flow_cfg->ntuple_offset +
 						OTX2_MAX_NTUPLE_FLOWS;
 		flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset +
@@ -69,6 +72,7 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
 		pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT;
 		pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT;
 		pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT;
+		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
 	}
 
 	for (i = 0; i < rsp->count; i++)
@@ -93,6 +97,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf)
 	INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
 
 	pf->flow_cfg->ntuple_max_flows = OTX2_MAX_NTUPLE_FLOWS;
+	pf->flow_cfg->tc_max_flows = pf->flow_cfg->ntuple_max_flows;
 
 	err = otx2_alloc_mcam_entries(pf);
 	if (err)
@@ -303,6 +308,35 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp,
 			       sizeof(pmask->ip4dst));
 			req->features |= BIT_ULL(NPC_DIP_IPV4);
 		}
+		if (ipv4_usr_mask->tos) {
+			pkt->tos = ipv4_usr_hdr->tos;
+			pmask->tos = ipv4_usr_mask->tos;
+			req->features |= BIT_ULL(NPC_TOS);
+		}
+		if (ipv4_usr_mask->proto) {
+			switch (ipv4_usr_hdr->proto) {
+			case IPPROTO_ICMP:
+				req->features |= BIT_ULL(NPC_IPPROTO_ICMP);
+				break;
+			case IPPROTO_TCP:
+				req->features |= BIT_ULL(NPC_IPPROTO_TCP);
+				break;
+			case IPPROTO_UDP:
+				req->features |= BIT_ULL(NPC_IPPROTO_UDP);
+				break;
+			case IPPROTO_SCTP:
+				req->features |= BIT_ULL(NPC_IPPROTO_SCTP);
+				break;
+			case IPPROTO_AH:
+				req->features |= BIT_ULL(NPC_IPPROTO_AH);
+				break;
+			case IPPROTO_ESP:
+				req->features |= BIT_ULL(NPC_IPPROTO_ESP);
+				break;
+			default:
+				return -EOPNOTSUPP;
+			}
+		}
 		pkt->etype = cpu_to_be16(ETH_P_IP);
 		pmask->etype = cpu_to_be16(0xFFFF);
 		req->features |= BIT_ULL(NPC_ETYPE);
@@ -327,6 +361,11 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp,
 			       sizeof(pmask->ip4dst));
 			req->features |= BIT_ULL(NPC_DIP_IPV4);
 		}
+		if (ipv4_l4_mask->tos) {
+			pkt->tos = ipv4_l4_hdr->tos;
+			pmask->tos = ipv4_l4_mask->tos;
+			req->features |= BIT_ULL(NPC_TOS);
+		}
 		if (ipv4_l4_mask->psrc) {
 			memcpy(&pkt->sport, &ipv4_l4_hdr->psrc,
 			       sizeof(pkt->sport));
@@ -377,10 +416,14 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp,
 			       sizeof(pmask->ip4dst));
 			req->features |= BIT_ULL(NPC_DIP_IPV4);
 		}
+		if (ah_esp_mask->tos) {
+			pkt->tos = ah_esp_hdr->tos;
+			pmask->tos = ah_esp_mask->tos;
+			req->features |= BIT_ULL(NPC_TOS);
+		}
 
 		/* NPC profile doesn't extract AH/ESP header fields */
-		if ((ah_esp_mask->spi & ah_esp_hdr->spi) ||
-		    (ah_esp_mask->tos & ah_esp_mask->tos))
+		if (ah_esp_mask->spi & ah_esp_hdr->spi)
 			return -EOPNOTSUPP;
 
 		if (flow_type == AH_V4_FLOW)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 2fd3d235d292..03004fdac0c6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1765,6 +1765,24 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return NETDEV_TX_OK;
 }
 
+static netdev_features_t otx2_fix_features(struct net_device *dev,
+					   netdev_features_t features)
+{
+	/* check if n-tuple filters are ON */
+	if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) {
+		netdev_info(dev, "Disabling n-tuple filters\n");
+		features &= ~NETIF_F_NTUPLE;
+	}
+
+	/* check if tc hw offload is ON */
+	if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) {
+		netdev_info(dev, "Disabling TC hardware offload\n");
+		features &= ~NETIF_F_HW_TC;
+	}
+
+	return features;
+}
+
 static void otx2_set_rx_mode(struct net_device *netdev)
 {
 	struct otx2_nic *pf = netdev_priv(netdev);
@@ -1827,6 +1845,12 @@ static int otx2_set_features(struct net_device *netdev,
 	if ((changed & NETIF_F_NTUPLE) && !ntuple)
 		otx2_destroy_ntuple_flows(pf);
 
+	if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
+	    pf->tc_info.num_entries) {
+		netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n");
+		return -EBUSY;
+	}
+
 	return 0;
 }
 
@@ -2225,6 +2249,7 @@ static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_open		= otx2_open,
 	.ndo_stop		= otx2_stop,
 	.ndo_start_xmit		= otx2_xmit,
+	.ndo_fix_features	= otx2_fix_features,
 	.ndo_set_mac_address    = otx2_set_mac_address,
 	.ndo_change_mtu		= otx2_change_mtu,
 	.ndo_set_rx_mode	= otx2_set_rx_mode,
@@ -2235,6 +2260,7 @@ static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_set_vf_mac		= otx2_set_vf_mac,
 	.ndo_set_vf_vlan	= otx2_set_vf_vlan,
 	.ndo_get_vf_config	= otx2_get_vf_config,
+	.ndo_setup_tc		= otx2_setup_tc,
 };
 
 static int otx2_wq_init(struct otx2_nic *pf)
@@ -2454,6 +2480,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 				       NETIF_F_HW_VLAN_STAG_RX;
 	netdev->features |= netdev->hw_features;
 
+	/* HW supports tc offload but mutually exclusive with n-tuple filters */
+	if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)
+		netdev->hw_features |= NETIF_F_HW_TC;
+
 	netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
 	netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
 
@@ -2475,6 +2505,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	otx2_set_ethtool_ops(netdev);
 
+	err = otx2_init_tc(pf);
+	if (err)
+		goto err_mcam_flow_del;
+
 	/* Enable link notifications */
 	otx2_cgx_config_linkevents(pf, true);
 
@@ -2484,6 +2518,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	return 0;
 
+err_mcam_flow_del:
+	otx2_mcam_flow_del(pf);
 err_unreg_netdev:
 	unregister_netdev(netdev);
 err_del_mcam_entries:
@@ -2651,6 +2687,7 @@ static void otx2_remove(struct pci_dev *pdev)
 
 	otx2_ptp_destroy(pf);
 	otx2_mcam_flow_del(pf);
+	otx2_shutdown_tc(pf);
 	otx2_detach_resources(&pf->mbox);
 	if (pf->hw.lmt_base)
 		iounmap(pf->hw.lmt_base);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
index 21b811c6ee0f..f4fd72ee9a25 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
@@ -152,6 +152,7 @@
 #define NIX_AF_TL3X_SCHEDULE(a)		(0x1000 | (a) << 16)
 #define NIX_AF_TL4X_PARENT(a)		(0x1288 | (a) << 16)
 #define NIX_AF_TL4X_SCHEDULE(a)		(0x1200 | (a) << 16)
+#define NIX_AF_TL4X_PIR(a)		(0x1230 | (a) << 16)
 #define NIX_AF_MDQX_SCHEDULE(a)		(0x1400 | (a) << 16)
 #define NIX_AF_MDQX_PARENT(a)		(0x1480 | (a) << 16)
 #define NIX_AF_TL3_TL2X_LINKX_CFG(a, b)	(0x1700 | (a) << 16 | (b) << 3)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
new file mode 100644
index 000000000000..51157b283f6f
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -0,0 +1,787 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/bitfield.h>
+#include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/ipv6.h>
+
+#include "otx2_common.h"
+
+/* Egress rate limiting definitions */
+#define MAX_BURST_EXPONENT		0x0FULL
+#define MAX_BURST_MANTISSA		0xFFULL
+#define MAX_BURST_SIZE			130816ULL
+#define MAX_RATE_DIVIDER_EXPONENT	12ULL
+#define MAX_RATE_EXPONENT		0x0FULL
+#define MAX_RATE_MANTISSA		0xFFULL
+
+/* Bitfields in NIX_TLX_PIR register */
+#define TLX_RATE_MANTISSA		GENMASK_ULL(8, 1)
+#define TLX_RATE_EXPONENT		GENMASK_ULL(12, 9)
+#define TLX_RATE_DIVIDER_EXPONENT	GENMASK_ULL(16, 13)
+#define TLX_BURST_MANTISSA		GENMASK_ULL(36, 29)
+#define TLX_BURST_EXPONENT		GENMASK_ULL(40, 37)
+
+struct otx2_tc_flow_stats {
+	u64 bytes;
+	u64 pkts;
+	u64 used;
+};
+
+struct otx2_tc_flow {
+	struct rhash_head		node;
+	unsigned long			cookie;
+	u16				entry;
+	unsigned int			bitpos;
+	struct rcu_head			rcu;
+	struct otx2_tc_flow_stats	stats;
+	spinlock_t			lock; /* lock for stats */
+};
+
+static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
+				      u32 *burst_mantissa)
+{
+	unsigned int tmp;
+
+	/* Burst is calculated as
+	 * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
+	 * Max supported burst size is 130,816 bytes.
+	 */
+	burst = min_t(u32, burst, MAX_BURST_SIZE);
+	if (burst) {
+		*burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
+		tmp = burst - rounddown_pow_of_two(burst);
+		if (burst < MAX_BURST_MANTISSA)
+			*burst_mantissa = tmp * 2;
+		else
+			*burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
+	} else {
+		*burst_exp = MAX_BURST_EXPONENT;
+		*burst_mantissa = MAX_BURST_MANTISSA;
+	}
+}
+
+static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+				     u32 *mantissa, u32 *div_exp)
+{
+	unsigned int tmp;
+
+	/* Rate calculation by hardware
+	 *
+	 * PIR_ADD = ((256 + mantissa) << exp) / 256
+	 * rate = (2 * PIR_ADD) / ( 1 << div_exp)
+	 * The resultant rate is in Mbps.
+	 */
+
+	/* 2Mbps to 100Gbps can be expressed with div_exp = 0.
+	 * Setting this to '0' will ease the calculation of
+	 * exponent and mantissa.
+	 */
+	*div_exp = 0;
+
+	if (maxrate) {
+		*exp = ilog2(maxrate) ? ilog2(maxrate) - 1 : 0;
+		tmp = maxrate - rounddown_pow_of_two(maxrate);
+		if (maxrate < MAX_RATE_MANTISSA)
+			*mantissa = tmp * 2;
+		else
+			*mantissa = tmp / (1ULL << (*exp - 7));
+	} else {
+		/* Instead of disabling rate limiting, set all values to max */
+		*exp = MAX_RATE_EXPONENT;
+		*mantissa = MAX_RATE_MANTISSA;
+	}
+}
+
+static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
+{
+	struct otx2_hw *hw = &nic->hw;
+	struct nix_txschq_config *req;
+	u32 burst_exp, burst_mantissa;
+	u32 exp, mantissa, div_exp;
+	int txschq, err;
+
+	/* All SQs share the same TL4, so pick the first scheduler */
+	txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+
+	/* Get exponent and mantissa values from the desired rate */
+	otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
+	otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+
+	mutex_lock(&nic->mbox.lock);
+	req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->lvl = NIX_TXSCH_LVL_TL4;
+	req->num_regs = 1;
+	req->reg[0] = NIX_AF_TL4X_PIR(txschq);
+	req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
+			 FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
+			 FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+			 FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+			 FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+
+	err = otx2_sync_mbox_msg(&nic->mbox);
+	mutex_unlock(&nic->mbox.lock);
+	return err;
+}
+
+static int otx2_tc_validate_flow(struct otx2_nic *nic,
+				 struct flow_action *actions,
+				 struct netlink_ext_ack *extack)
+{
+	if (nic->flags & OTX2_FLAG_INTF_DOWN) {
+		NL_SET_ERR_MSG_MOD(extack, "Interface not initialized");
+		return -EINVAL;
+	}
+
+	if (!flow_action_has_entries(actions)) {
+		NL_SET_ERR_MSG_MOD(extack, "MATCHALL offload called with no action");
+		return -EINVAL;
+	}
+
+	if (!flow_offload_has_one_action(actions)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Egress MATCHALL offload supports only 1 policing action");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+					   struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct flow_action *actions = &cls->rule->action;
+	struct flow_action_entry *entry;
+	u32 rate;
+	int err;
+
+	err = otx2_tc_validate_flow(nic, actions, extack);
+	if (err)
+		return err;
+
+	if (nic->flags & OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only one Egress MATCHALL ratelimiter can be offloaded");
+		return -ENOMEM;
+	}
+
+	entry = &cls->rule->action.entries[0];
+	switch (entry->id) {
+	case FLOW_ACTION_POLICE:
+		if (entry->police.rate_pkt_ps) {
+			NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+			return -EOPNOTSUPP;
+		}
+		/* Convert bytes per second to Mbps */
+		rate = entry->police.rate_bytes_ps * 8;
+		rate = max_t(u32, rate / 1000000, 1);
+		err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
+		if (err)
+			return err;
+		nic->flags |= OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only police action is supported with Egress MATCHALL offload");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic,
+					  struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	int err;
+
+	if (nic->flags & OTX2_FLAG_INTF_DOWN) {
+		NL_SET_ERR_MSG_MOD(extack, "Interface not initialized");
+		return -EINVAL;
+	}
+
+	err = otx2_set_matchall_egress_rate(nic, 0, 0);
+	nic->flags &= ~OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED;
+	return err;
+}
+
+static int otx2_tc_parse_actions(struct otx2_nic *nic,
+				 struct flow_action *flow_action,
+				 struct npc_install_flow_req *req)
+{
+	struct flow_action_entry *act;
+	struct net_device *target;
+	struct otx2_nic *priv;
+	int i;
+
+	if (!flow_action_has_entries(flow_action)) {
+		netdev_info(nic->netdev, "no tc actions specified");
+		return -EINVAL;
+	}
+
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
+			req->op = NIX_RX_ACTIONOP_DROP;
+			return 0;
+		case FLOW_ACTION_ACCEPT:
+			req->op = NIX_RX_ACTION_DEFAULT;
+			return 0;
+		case FLOW_ACTION_REDIRECT_INGRESS:
+			target = act->dev;
+			priv = netdev_priv(target);
+			/* npc_install_flow_req doesn't support passing a target pcifunc */
+			if (rvu_get_pf(nic->pcifunc) != rvu_get_pf(priv->pcifunc)) {
+				netdev_info(nic->netdev,
+					    "can't redirect to other pf/vf\n");
+				return -EOPNOTSUPP;
+			}
+			req->vf = priv->pcifunc & RVU_PFVF_FUNC_MASK;
+			req->op = NIX_RX_ACTION_DEFAULT;
+			return 0;
+		case FLOW_ACTION_VLAN_POP:
+			req->vtag0_valid = true;
+			/* use RX_VTAG_TYPE7 which is initialized to strip vlan tag */
+			req->vtag0_type = NIX_AF_LFX_RX_VTAG_TYPE7;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int otx2_tc_prepare_flow(struct otx2_nic *nic,
+				struct flow_cls_offload *f,
+				struct npc_install_flow_req *req)
+{
+	struct flow_msg *flow_spec = &req->packet;
+	struct flow_msg *flow_mask = &req->mask;
+	struct flow_dissector *dissector;
+	struct flow_rule *rule;
+	u8 ip_proto = 0;
+
+	rule = flow_cls_offload_flow_rule(f);
+	dissector = rule->match.dissector;
+
+	if ((dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_IP))))  {
+		netdev_info(nic->netdev, "unsupported flow used key 0x%x",
+			    dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+
+		/* All EtherTypes can be matched, no hw limitation */
+		flow_spec->etype = match.key->n_proto;
+		flow_mask->etype = match.mask->n_proto;
+		req->features |= BIT_ULL(NPC_ETYPE);
+
+		if (match.mask->ip_proto &&
+		    (match.key->ip_proto != IPPROTO_TCP &&
+		     match.key->ip_proto != IPPROTO_UDP &&
+		     match.key->ip_proto != IPPROTO_SCTP &&
+		     match.key->ip_proto != IPPROTO_ICMP &&
+		     match.key->ip_proto != IPPROTO_ICMPV6)) {
+			netdev_info(nic->netdev,
+				    "ip_proto=0x%x not supported\n",
+				    match.key->ip_proto);
+			return -EOPNOTSUPP;
+		}
+		if (match.mask->ip_proto)
+			ip_proto = match.key->ip_proto;
+
+		if (ip_proto == IPPROTO_UDP)
+			req->features |= BIT_ULL(NPC_IPPROTO_UDP);
+		else if (ip_proto == IPPROTO_TCP)
+			req->features |= BIT_ULL(NPC_IPPROTO_TCP);
+		else if (ip_proto == IPPROTO_SCTP)
+			req->features |= BIT_ULL(NPC_IPPROTO_SCTP);
+		else if (ip_proto == IPPROTO_ICMP)
+			req->features |= BIT_ULL(NPC_IPPROTO_ICMP);
+		else if (ip_proto == IPPROTO_ICMPV6)
+			req->features |= BIT_ULL(NPC_IPPROTO_ICMP6);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+		if (!is_zero_ether_addr(match.mask->src)) {
+			netdev_err(nic->netdev, "src mac match not supported\n");
+			return -EOPNOTSUPP;
+		}
+
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			ether_addr_copy(flow_spec->dmac, (u8 *)&match.key->dst);
+			ether_addr_copy(flow_mask->dmac,
+					(u8 *)&match.mask->dst);
+			req->features |= BIT_ULL(NPC_DMAC);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_ip(rule, &match);
+		if ((ntohs(flow_spec->etype) != ETH_P_IP) &&
+		    match.mask->tos) {
+			netdev_err(nic->netdev, "tos not supported\n");
+			return -EOPNOTSUPP;
+		}
+		if (match.mask->ttl) {
+			netdev_err(nic->netdev, "ttl not supported\n");
+			return -EOPNOTSUPP;
+		}
+		flow_spec->tos = match.key->tos;
+		flow_mask->tos = match.mask->tos;
+		req->features |= BIT_ULL(NPC_TOS);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+		u16 vlan_tci, vlan_tci_mask;
+
+		flow_rule_match_vlan(rule, &match);
+
+		if (ntohs(match.key->vlan_tpid) != ETH_P_8021Q) {
+			netdev_err(nic->netdev, "vlan tpid 0x%x not supported\n",
+				   ntohs(match.key->vlan_tpid));
+			return -EOPNOTSUPP;
+		}
+
+		if (match.mask->vlan_id ||
+		    match.mask->vlan_dei ||
+		    match.mask->vlan_priority) {
+			vlan_tci = match.key->vlan_id |
+				   match.key->vlan_dei << 12 |
+				   match.key->vlan_priority << 13;
+
+			vlan_tci_mask = match.mask->vlan_id |
+					match.key->vlan_dei << 12 |
+					match.key->vlan_priority << 13;
+
+			flow_spec->vlan_tci = htons(vlan_tci);
+			flow_mask->vlan_tci = htons(vlan_tci_mask);
+			req->features |= BIT_ULL(NPC_OUTER_VID);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+
+		flow_spec->ip4dst = match.key->dst;
+		flow_mask->ip4dst = match.mask->dst;
+		req->features |= BIT_ULL(NPC_DIP_IPV4);
+
+		flow_spec->ip4src = match.key->src;
+		flow_mask->ip4src = match.mask->src;
+		req->features |= BIT_ULL(NPC_SIP_IPV4);
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
+			netdev_err(nic->netdev,
+				   "Flow matching on IPv6 loopback addr is not supported\n");
+			return -EOPNOTSUPP;
+		}
+
+		if (!ipv6_addr_any(&match.mask->dst)) {
+			memcpy(&flow_spec->ip6dst,
+			       (struct in6_addr *)&match.key->dst,
+			       sizeof(flow_spec->ip6dst));
+			memcpy(&flow_mask->ip6dst,
+			       (struct in6_addr *)&match.mask->dst,
+			       sizeof(flow_spec->ip6dst));
+			req->features |= BIT_ULL(NPC_DIP_IPV6);
+		}
+
+		if (!ipv6_addr_any(&match.mask->src)) {
+			memcpy(&flow_spec->ip6src,
+			       (struct in6_addr *)&match.key->src,
+			       sizeof(flow_spec->ip6src));
+			memcpy(&flow_mask->ip6src,
+			       (struct in6_addr *)&match.mask->src,
+			       sizeof(flow_spec->ip6src));
+			req->features |= BIT_ULL(NPC_SIP_IPV6);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+
+		flow_spec->dport = match.key->dst;
+		flow_mask->dport = match.mask->dst;
+		if (ip_proto == IPPROTO_UDP)
+			req->features |= BIT_ULL(NPC_DPORT_UDP);
+		else if (ip_proto == IPPROTO_TCP)
+			req->features |= BIT_ULL(NPC_DPORT_TCP);
+		else if (ip_proto == IPPROTO_SCTP)
+			req->features |= BIT_ULL(NPC_DPORT_SCTP);
+
+		flow_spec->sport = match.key->src;
+		flow_mask->sport = match.mask->src;
+		if (ip_proto == IPPROTO_UDP)
+			req->features |= BIT_ULL(NPC_SPORT_UDP);
+		else if (ip_proto == IPPROTO_TCP)
+			req->features |= BIT_ULL(NPC_SPORT_TCP);
+		else if (ip_proto == IPPROTO_SCTP)
+			req->features |= BIT_ULL(NPC_SPORT_SCTP);
+	}
+
+	return otx2_tc_parse_actions(nic, &rule->action, req);
+}
+
+static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
+{
+	struct npc_delete_flow_req *req;
+	int err;
+
+	mutex_lock(&nic->mbox.lock);
+	req = otx2_mbox_alloc_msg_npc_delete_flow(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->entry = entry;
+
+	/* Send message to AF */
+	err = otx2_sync_mbox_msg(&nic->mbox);
+	if (err) {
+		netdev_err(nic->netdev, "Failed to delete MCAM flow entry %d\n",
+			   entry);
+		mutex_unlock(&nic->mbox.lock);
+		return -EFAULT;
+	}
+	mutex_unlock(&nic->mbox.lock);
+
+	return 0;
+}
+
+static int otx2_tc_del_flow(struct otx2_nic *nic,
+			    struct flow_cls_offload *tc_flow_cmd)
+{
+	struct otx2_tc_info *tc_info = &nic->tc_info;
+	struct otx2_tc_flow *flow_node;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_err(nic->netdev, "tc flow not found for cookie 0x%lx\n",
+			   tc_flow_cmd->cookie);
+		return -EINVAL;
+	}
+
+	otx2_del_mcam_flow_entry(nic, flow_node->entry);
+
+	WARN_ON(rhashtable_remove_fast(&nic->tc_info.flow_table,
+				       &flow_node->node,
+				       nic->tc_info.flow_ht_params));
+	kfree_rcu(flow_node, rcu);
+
+	clear_bit(flow_node->bitpos, tc_info->tc_entries_bitmap);
+	tc_info->num_entries--;
+
+	return 0;
+}
+
+static int otx2_tc_add_flow(struct otx2_nic *nic,
+			    struct flow_cls_offload *tc_flow_cmd)
+{
+	struct otx2_tc_info *tc_info = &nic->tc_info;
+	struct otx2_tc_flow *new_node, *old_node;
+	struct npc_install_flow_req *req;
+	int rc;
+
+	if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
+		return -ENOMEM;
+
+	/* allocate memory for the new flow and it's node */
+	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node)
+		return -ENOMEM;
+	spin_lock_init(&new_node->lock);
+	new_node->cookie = tc_flow_cmd->cookie;
+
+	mutex_lock(&nic->mbox.lock);
+	req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	rc = otx2_tc_prepare_flow(nic, tc_flow_cmd, req);
+	if (rc) {
+		otx2_mbox_reset(&nic->mbox.mbox, 0);
+		mutex_unlock(&nic->mbox.lock);
+		return rc;
+	}
+
+	/* If a flow exists with the same cookie, delete it */
+	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					  &tc_flow_cmd->cookie,
+					  tc_info->flow_ht_params);
+	if (old_node)
+		otx2_tc_del_flow(nic, tc_flow_cmd);
+
+	if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) {
+		netdev_err(nic->netdev, "Not enough MCAM space to add the flow\n");
+		otx2_mbox_reset(&nic->mbox.mbox, 0);
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap,
+					       nic->flow_cfg->tc_max_flows);
+	req->channel = nic->hw.rx_chan_base;
+	req->entry = nic->flow_cfg->entry[nic->flow_cfg->tc_flower_offset +
+					  nic->flow_cfg->tc_max_flows - new_node->bitpos];
+	req->intf = NIX_INTF_RX;
+	req->set_cntr = 1;
+	new_node->entry = req->entry;
+
+	/* Send message to AF */
+	rc = otx2_sync_mbox_msg(&nic->mbox);
+	if (rc) {
+		netdev_err(nic->netdev, "Failed to install MCAM flow entry\n");
+		mutex_unlock(&nic->mbox.lock);
+		goto out;
+	}
+	mutex_unlock(&nic->mbox.lock);
+
+	/* add new flow to flow-table */
+	rc = rhashtable_insert_fast(&nic->tc_info.flow_table, &new_node->node,
+				    nic->tc_info.flow_ht_params);
+	if (rc) {
+		otx2_del_mcam_flow_entry(nic, req->entry);
+		kfree_rcu(new_node, rcu);
+		goto out;
+	}
+
+	set_bit(new_node->bitpos, tc_info->tc_entries_bitmap);
+	tc_info->num_entries++;
+out:
+	return rc;
+}
+
+static int otx2_tc_get_flow_stats(struct otx2_nic *nic,
+				  struct flow_cls_offload *tc_flow_cmd)
+{
+	struct otx2_tc_info *tc_info = &nic->tc_info;
+	struct npc_mcam_get_stats_req *req;
+	struct npc_mcam_get_stats_rsp *rsp;
+	struct otx2_tc_flow_stats *stats;
+	struct otx2_tc_flow *flow_node;
+	int err;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_info(nic->netdev, "tc flow not found for cookie %lx",
+			    tc_flow_cmd->cookie);
+		return -EINVAL;
+	}
+
+	mutex_lock(&nic->mbox.lock);
+
+	req = otx2_mbox_alloc_msg_npc_mcam_entry_stats(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->entry = flow_node->entry;
+
+	err = otx2_sync_mbox_msg(&nic->mbox);
+	if (err) {
+		netdev_err(nic->netdev, "Failed to get stats for MCAM flow entry %d\n",
+			   req->entry);
+		mutex_unlock(&nic->mbox.lock);
+		return -EFAULT;
+	}
+
+	rsp = (struct npc_mcam_get_stats_rsp *)otx2_mbox_get_rsp
+		(&nic->mbox.mbox, 0, &req->hdr);
+	if (IS_ERR(rsp)) {
+		mutex_unlock(&nic->mbox.lock);
+		return PTR_ERR(rsp);
+	}
+
+	mutex_unlock(&nic->mbox.lock);
+
+	if (!rsp->stat_ena)
+		return -EINVAL;
+
+	stats = &flow_node->stats;
+
+	spin_lock(&flow_node->lock);
+	flow_stats_update(&tc_flow_cmd->stats, 0x0, rsp->stat - stats->pkts, 0x0, 0x0,
+			  FLOW_ACTION_HW_STATS_IMMEDIATE);
+	stats->pkts = rsp->stat;
+	spin_unlock(&flow_node->lock);
+
+	return 0;
+}
+
+static int otx2_setup_tc_cls_flower(struct otx2_nic *nic,
+				    struct flow_cls_offload *cls_flower)
+{
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return otx2_tc_add_flow(nic, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return otx2_tc_del_flow(nic, cls_flower);
+	case FLOW_CLS_STATS:
+		return otx2_tc_get_flow_stats(nic, cls_flower);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int otx2_setup_tc_block_ingress_cb(enum tc_setup_type type,
+					  void *type_data, void *cb_priv)
+{
+	struct otx2_nic *nic = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(nic->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return otx2_setup_tc_cls_flower(nic, type_data);
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int otx2_setup_tc_egress_matchall(struct otx2_nic *nic,
+					 struct tc_cls_matchall_offload *cls_matchall)
+{
+	switch (cls_matchall->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return otx2_tc_egress_matchall_install(nic, cls_matchall);
+	case TC_CLSMATCHALL_DESTROY:
+		return otx2_tc_egress_matchall_delete(nic, cls_matchall);
+	case TC_CLSMATCHALL_STATS:
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int otx2_setup_tc_block_egress_cb(enum tc_setup_type type,
+					 void *type_data, void *cb_priv)
+{
+	struct otx2_nic *nic = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(nic->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return otx2_setup_tc_egress_matchall(nic, type_data);
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static LIST_HEAD(otx2_block_cb_list);
+
+static int otx2_setup_tc_block(struct net_device *netdev,
+			       struct flow_block_offload *f)
+{
+	struct otx2_nic *nic = netdev_priv(netdev);
+	flow_setup_cb_t *cb;
+	bool ingress;
+
+	if (f->block_shared)
+		return -EOPNOTSUPP;
+
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+		cb = otx2_setup_tc_block_ingress_cb;
+		ingress = true;
+	} else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+		cb = otx2_setup_tc_block_egress_cb;
+		ingress = false;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	return flow_block_cb_setup_simple(f, &otx2_block_cb_list, cb,
+					  nic, nic, ingress);
+}
+
+int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		  void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return otx2_setup_tc_block(netdev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct rhashtable_params tc_flow_ht_params = {
+	.head_offset = offsetof(struct otx2_tc_flow, node),
+	.key_offset = offsetof(struct otx2_tc_flow, cookie),
+	.key_len = sizeof(((struct otx2_tc_flow *)0)->cookie),
+	.automatic_shrinking = true,
+};
+
+int otx2_init_tc(struct otx2_nic *nic)
+{
+	struct otx2_tc_info *tc = &nic->tc_info;
+
+	tc->flow_ht_params = tc_flow_ht_params;
+	return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
+}
+
+void otx2_shutdown_tc(struct otx2_nic *nic)
+{
+	struct otx2_tc_info *tc = &nic->tc_info;
+
+	rhashtable_destroy(&tc->flow_table);
+}
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index dbec8e187a68..2a752fb6b758 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4135,7 +4135,7 @@ static int sky2_set_coalesce(struct net_device *dev,
 /*
  * Hardware is limited to min of 128 and max of 2048 for ring size
  * and  rounded up to next power of two
- * to avoid division in modulus calclation
+ * to avoid division in modulus calculation
  */
 static unsigned long roundup_ring_size(unsigned long pending)
 {
diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index 3a777b4a6cd3..79d4cdbbcbf5 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -4,5 +4,5 @@
 #
 
 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
 obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 01d3ee4b5829..0396f0db855f 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -19,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/pinctrl/devinfo.h>
 #include <linux/phylink.h>
+#include <net/dsa.h>
 
 #include "mtk_eth_soc.h"
 
@@ -1264,13 +1265,12 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 			break;
 
 		/* find out which mac the packet come from. values start at 1 */
-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) ||
+		    (trxd.rxd4 & RX_DMA_SPECIAL_TAG))
 			mac = 0;
-		} else {
-			mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
-				RX_DMA_FPORT_MASK;
-			mac--;
-		}
+		else
+			mac = ((trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
+			       RX_DMA_FPORT_MASK) - 1;
 
 		if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
 			     !eth->netdev[mac]))
@@ -2233,6 +2233,9 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 config)
 
 		val |= config;
 
+		if (!i && eth->netdev[0] && netdev_uses_dsa(eth->netdev[0]))
+			val |= MTK_GDMA_SPECIAL_TAG;
+
 		mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
 	}
 	/* Reset and enable PSE */
@@ -2255,12 +2258,17 @@ static int mtk_open(struct net_device *dev)
 
 	/* we run 2 netdevs on the same dma ring so we only bring it up once */
 	if (!refcount_read(&eth->dma_refcnt)) {
-		int err = mtk_start_dma(eth);
+		u32 gdm_config = MTK_GDMA_TO_PDMA;
+		int err;
 
+		err = mtk_start_dma(eth);
 		if (err)
 			return err;
 
-		mtk_gdm_config(eth, MTK_GDMA_TO_PDMA);
+		if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
+			gdm_config = MTK_GDMA_TO_PPE;
+
+		mtk_gdm_config(eth, gdm_config);
 
 		napi_enable(&eth->tx_napi);
 		napi_enable(&eth->rx_napi);
@@ -2327,6 +2335,9 @@ static int mtk_stop(struct net_device *dev)
 
 	mtk_dma_free(eth);
 
+	if (eth->soc->offload_version)
+		mtk_ppe_stop(&eth->ppe);
+
 	return 0;
 }
 
@@ -2832,6 +2843,7 @@ static const struct net_device_ops mtk_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= mtk_poll_controller,
 #endif
+	.ndo_setup_tc		= mtk_eth_setup_tc,
 };
 
 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
@@ -3088,6 +3100,17 @@ static int mtk_probe(struct platform_device *pdev)
 			goto err_free_dev;
 	}
 
+	if (eth->soc->offload_version) {
+		err = mtk_ppe_init(&eth->ppe, eth->dev,
+				   eth->base + MTK_ETH_PPE_BASE, 2);
+		if (err)
+			goto err_free_dev;
+
+		err = mtk_eth_offload_init(eth);
+		if (err)
+			goto err_free_dev;
+	}
+
 	for (i = 0; i < MTK_MAX_DEVS; i++) {
 		if (!eth->netdev[i])
 			continue;
@@ -3162,6 +3185,7 @@ static const struct mtk_soc_data mt7621_data = {
 	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7621_CLKS_BITMAP,
 	.required_pctl = false,
+	.offload_version = 2,
 };
 
 static const struct mtk_soc_data mt7622_data = {
@@ -3170,6 +3194,7 @@ static const struct mtk_soc_data mt7622_data = {
 	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7622_CLKS_BITMAP,
 	.required_pctl = false,
+	.offload_version = 2,
 };
 
 static const struct mtk_soc_data mt7623_data = {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index fd3cec8f06ba..1a6750c08bb9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,8 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 #include <linux/phylink.h>
+#include <linux/rhashtable.h>
+#include "mtk_ppe.h"
 
 #define MTK_QDMA_PAGE_SIZE	2048
 #define MTK_MAX_RX_LENGTH	1536
@@ -40,7 +42,8 @@
 				 NETIF_F_HW_VLAN_CTAG_RX | \
 				 NETIF_F_SG | NETIF_F_TSO | \
 				 NETIF_F_TSO6 | \
-				 NETIF_F_IPV6_CSUM)
+				 NETIF_F_IPV6_CSUM |\
+				 NETIF_F_HW_TC)
 #define MTK_HW_FEATURES_MT7628	(NETIF_F_SG | NETIF_F_RXCSUM)
 #define NEXT_DESP_IDX(X, Y)	(((X) + 1) & ((Y) - 1))
 
@@ -82,10 +85,12 @@
 
 /* GDM Exgress Control Register */
 #define MTK_GDMA_FWD_CFG(x)	(0x500 + (x * 0x1000))
+#define MTK_GDMA_SPECIAL_TAG	BIT(24)
 #define MTK_GDMA_ICS_EN		BIT(22)
 #define MTK_GDMA_TCS_EN		BIT(21)
 #define MTK_GDMA_UCS_EN		BIT(20)
 #define MTK_GDMA_TO_PDMA	0x0
+#define MTK_GDMA_TO_PPE		0x4444
 #define MTK_GDMA_DROP_ALL       0x7777
 
 /* Unicast Filter MAC Address Register - Low */
@@ -301,10 +306,17 @@
 #define RX_DMA_VID(_x)		((_x) & 0xfff)
 
 /* QDMA descriptor rxd4 */
+#define MTK_RXD4_FOE_ENTRY	GENMASK(13, 0)
+#define MTK_RXD4_PPE_CPU_REASON	GENMASK(18, 14)
+#define MTK_RXD4_SRC_PORT	GENMASK(21, 19)
+#define MTK_RXD4_ALG		GENMASK(31, 22)
+
+/* QDMA descriptor rxd4 */
 #define RX_DMA_L4_VALID		BIT(24)
 #define RX_DMA_L4_VALID_PDMA	BIT(30)		/* when PDMA is used */
 #define RX_DMA_FPORT_SHIFT	19
 #define RX_DMA_FPORT_MASK	0x7
+#define RX_DMA_SPECIAL_TAG	BIT(22)
 
 /* PHY Indirect Access Control registers */
 #define MTK_PHY_IAC		0x10004
@@ -802,6 +814,7 @@ struct mtk_soc_data {
 	u32		caps;
 	u32		required_clks;
 	bool		required_pctl;
+	u8		offload_version;
 	netdev_features_t hw_features;
 };
 
@@ -901,6 +914,9 @@ struct mtk_eth {
 	u32				tx_int_status_reg;
 	u32				rx_dma_l4_valid;
 	int				ip_align;
+
+	struct mtk_ppe			ppe;
+	struct rhashtable		flow_table;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
@@ -945,4 +961,9 @@ int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id);
 int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
 int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
 
+int mtk_eth_offload_init(struct mtk_eth *eth);
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data);
+
+
 #endif /* MTK_ETH_H */
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
new file mode 100644
index 000000000000..a1a9959a2461
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include "mtk_ppe.h"
+#include "mtk_ppe_regs.h"
+
+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+	writel(val, ppe->base + reg);
+}
+
+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
+{
+	return readl(ppe->base + reg);
+}
+
+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
+{
+	u32 val;
+
+	val = ppe_r32(ppe, reg);
+	val &= ~mask;
+	val |= set;
+	ppe_w32(ppe, reg, val);
+
+	return val;
+}
+
+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+	return ppe_m32(ppe, reg, 0, val);
+}
+
+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+	return ppe_m32(ppe, reg, val, 0);
+}
+
+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
+{
+	unsigned long timeout = jiffies + HZ;
+
+	while (time_is_before_jiffies(timeout)) {
+		if (!(ppe_r32(ppe, MTK_PPE_GLO_CFG) & MTK_PPE_GLO_CFG_BUSY))
+			return 0;
+
+		usleep_range(10, 20);
+	}
+
+	dev_err(ppe->dev, "PPE table busy");
+
+	return -ETIMEDOUT;
+}
+
+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
+{
+	ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
+	ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
+}
+
+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
+{
+	mtk_ppe_cache_clear(ppe);
+
+	ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
+		enable * MTK_PPE_CACHE_CTL_EN);
+}
+
+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
+{
+	u32 hv1, hv2, hv3;
+	u32 hash;
+
+	switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
+		case MTK_PPE_PKT_TYPE_BRIDGE:
+			hv1 = e->bridge.src_mac_lo;
+			hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
+			hv2 = e->bridge.src_mac_hi >> 16;
+			hv2 ^= e->bridge.dest_mac_lo;
+			hv3 = e->bridge.dest_mac_hi;
+			break;
+		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+			hv1 = e->ipv4.orig.ports;
+			hv2 = e->ipv4.orig.dest_ip;
+			hv3 = e->ipv4.orig.src_ip;
+			break;
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+			hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
+			hv1 ^= e->ipv6.ports;
+
+			hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
+			hv2 ^= e->ipv6.dest_ip[0];
+
+			hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
+			hv3 ^= e->ipv6.src_ip[0];
+			break;
+		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+		case MTK_PPE_PKT_TYPE_IPV6_6RD:
+		default:
+			WARN_ON_ONCE(1);
+			return MTK_PPE_HASH_MASK;
+	}
+
+	hash = (hv1 & hv2) | ((~hv1) & hv3);
+	hash = (hash >> 24) | ((hash & 0xffffff) << 8);
+	hash ^= hv1 ^ hv2 ^ hv3;
+	hash ^= hash >> 16;
+	hash <<= 1;
+	hash &= MTK_PPE_ENTRIES - 1;
+
+	return hash;
+}
+
+static inline struct mtk_foe_mac_info *
+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+		return &entry->ipv6.l2;
+
+	return &entry->ipv4.l2;
+}
+
+static inline u32 *
+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+		return &entry->ipv6.ib2;
+
+	return &entry->ipv4.ib2;
+}
+
+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+			  u8 pse_port, u8 *src_mac, u8 *dest_mac)
+{
+	struct mtk_foe_mac_info *l2;
+	u32 ports_pad, val;
+
+	memset(entry, 0, sizeof(*entry));
+
+	val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
+	      FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
+	      FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
+	      MTK_FOE_IB1_BIND_TTL |
+	      MTK_FOE_IB1_BIND_CACHE;
+	entry->ib1 = val;
+
+	val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
+	      FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
+	      FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
+
+	if (is_multicast_ether_addr(dest_mac))
+		val |= MTK_FOE_IB2_MULTICAST;
+
+	ports_pad = 0xa5a5a500 | (l4proto & 0xff);
+	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
+		entry->ipv4.orig.ports = ports_pad;
+	if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
+		entry->ipv6.ports = ports_pad;
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+		entry->ipv6.ib2 = val;
+		l2 = &entry->ipv6.l2;
+	} else {
+		entry->ipv4.ib2 = val;
+		l2 = &entry->ipv4.l2;
+	}
+
+	l2->dest_mac_hi = get_unaligned_be32(dest_mac);
+	l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
+	l2->src_mac_hi = get_unaligned_be32(src_mac);
+	l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
+		l2->etype = ETH_P_IPV6;
+	else
+		l2->etype = ETH_P_IP;
+
+	return 0;
+}
+
+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
+{
+	u32 *ib2 = mtk_foe_entry_ib2(entry);
+	u32 val;
+
+	val = *ib2;
+	val &= ~MTK_FOE_IB2_DEST_PORT;
+	val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
+	*ib2 = val;
+
+	return 0;
+}
+
+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
+				 __be32 src_addr, __be16 src_port,
+				 __be32 dest_addr, __be16 dest_port)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+	struct mtk_ipv4_tuple *t;
+
+	switch (type) {
+	case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+		if (egress) {
+			t = &entry->ipv4.new;
+			break;
+		}
+		fallthrough;
+	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+	case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+		t = &entry->ipv4.orig;
+		break;
+	case MTK_PPE_PKT_TYPE_IPV6_6RD:
+		entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
+		entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
+		return 0;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	t->src_ip = be32_to_cpu(src_addr);
+	t->dest_ip = be32_to_cpu(dest_addr);
+
+	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
+		return 0;
+
+	t->src_port = be16_to_cpu(src_port);
+	t->dest_port = be16_to_cpu(dest_port);
+
+	return 0;
+}
+
+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
+				 __be32 *src_addr, __be16 src_port,
+				 __be32 *dest_addr, __be16 dest_port)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+	u32 *src, *dest;
+	int i;
+
+	switch (type) {
+	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+		src = entry->dslite.tunnel_src_ip;
+		dest = entry->dslite.tunnel_dest_ip;
+		break;
+	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+	case MTK_PPE_PKT_TYPE_IPV6_6RD:
+		entry->ipv6.src_port = be16_to_cpu(src_port);
+		entry->ipv6.dest_port = be16_to_cpu(dest_port);
+		fallthrough;
+	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+		src = entry->ipv6.src_ip;
+		dest = entry->ipv6.dest_ip;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	};
+
+	for (i = 0; i < 4; i++)
+		src[i] = be32_to_cpu(src_addr[i]);
+	for (i = 0; i < 4; i++)
+		dest[i] = be32_to_cpu(dest_addr[i]);
+
+	return 0;
+}
+
+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
+{
+	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+	l2->etype = BIT(port);
+
+	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
+		entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+	else
+		l2->etype |= BIT(8);
+
+	entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
+
+	return 0;
+}
+
+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
+{
+	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+	switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
+	case 0:
+		entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
+			      FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+		l2->vlan1 = vid;
+		return 0;
+	case 1:
+		if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
+			l2->vlan1 = vid;
+			l2->etype |= BIT(8);
+		} else {
+			l2->vlan2 = vid;
+			entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+		}
+		return 0;
+	default:
+		return -ENOSPC;
+	}
+}
+
+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
+{
+	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
+	    (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
+		l2->etype = ETH_P_PPP_SES;
+
+	entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
+	l2->pppoe_id = sid;
+
+	return 0;
+}
+
+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
+{
+	return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
+	       FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
+}
+
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+			 u16 timestamp)
+{
+	struct mtk_foe_entry *hwe;
+	u32 hash;
+
+	timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
+	entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
+	entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
+
+	hash = mtk_ppe_hash_entry(entry);
+	hwe = &ppe->foe_table[hash];
+	if (!mtk_foe_entry_usable(hwe)) {
+		hwe++;
+		hash++;
+
+		if (!mtk_foe_entry_usable(hwe))
+			return -ENOSPC;
+	}
+
+	memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
+	wmb();
+	hwe->ib1 = entry->ib1;
+
+	dma_wmb();
+
+	mtk_ppe_cache_clear(ppe);
+
+	return hash;
+}
+
+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+		 int version)
+{
+	struct mtk_foe_entry *foe;
+
+	/* need to allocate a separate device, since it PPE DMA access is
+	 * not coherent.
+	 */
+	ppe->base = base;
+	ppe->dev = dev;
+	ppe->version = version;
+
+	foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
+				  &ppe->foe_phys, GFP_KERNEL);
+	if (!foe)
+		return -ENOMEM;
+
+	ppe->foe_table = foe;
+
+	mtk_ppe_debugfs_init(ppe);
+
+	return 0;
+}
+
+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
+{
+	static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
+	int i, k;
+
+	memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table));
+
+	if (!IS_ENABLED(CONFIG_SOC_MT7621))
+		return;
+
+	/* skip all entries that cross the 1024 byte boundary */
+	for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
+		for (k = 0; k < ARRAY_SIZE(skip); k++)
+			ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
+}
+
+int mtk_ppe_start(struct mtk_ppe *ppe)
+{
+	u32 val;
+
+	mtk_ppe_init_foe_table(ppe);
+	ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
+
+	val = MTK_PPE_TB_CFG_ENTRY_80B |
+	      MTK_PPE_TB_CFG_AGE_NON_L4 |
+	      MTK_PPE_TB_CFG_AGE_UNBIND |
+	      MTK_PPE_TB_CFG_AGE_TCP |
+	      MTK_PPE_TB_CFG_AGE_UDP |
+	      MTK_PPE_TB_CFG_AGE_TCP_FIN |
+	      FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
+			 MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
+			 MTK_PPE_KEEPALIVE_DISABLE) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
+			 MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
+			 MTK_PPE_ENTRIES_SHIFT);
+	ppe_w32(ppe, MTK_PPE_TB_CFG, val);
+
+	ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
+		MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
+
+	mtk_ppe_cache_enable(ppe, true);
+
+	val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
+	      MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
+	      MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
+	      MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
+	      MTK_PPE_FLOW_CFG_IP6_6RD |
+	      MTK_PPE_FLOW_CFG_IP4_NAT |
+	      MTK_PPE_FLOW_CFG_IP4_NAPT |
+	      MTK_PPE_FLOW_CFG_IP4_DSLITE |
+	      MTK_PPE_FLOW_CFG_L2_BRIDGE |
+	      MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
+	ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
+
+	val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
+	      FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
+	ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
+
+	val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
+	      FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
+	ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
+
+	val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
+	      FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
+	ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
+
+	val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
+	ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
+
+	val = MTK_PPE_BIND_LIMIT1_FULL |
+	      FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
+	ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
+
+	val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
+	      FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
+	ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
+
+	/* enable PPE */
+	val = MTK_PPE_GLO_CFG_EN |
+	      MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
+	      MTK_PPE_GLO_CFG_IP4_CS_DROP |
+	      MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
+	ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
+
+	ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
+
+	return 0;
+}
+
+int mtk_ppe_stop(struct mtk_ppe *ppe)
+{
+	u32 val;
+	int i;
+
+	for (i = 0; i < MTK_PPE_ENTRIES; i++)
+		ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
+						   MTK_FOE_STATE_INVALID);
+
+	mtk_ppe_cache_enable(ppe, false);
+
+	/* disable offload engine */
+	ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
+	ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
+
+	/* disable aging */
+	val = MTK_PPE_TB_CFG_AGE_NON_L4 |
+	      MTK_PPE_TB_CFG_AGE_UNBIND |
+	      MTK_PPE_TB_CFG_AGE_TCP |
+	      MTK_PPE_TB_CFG_AGE_UDP |
+	      MTK_PPE_TB_CFG_AGE_TCP_FIN;
+	ppe_clear(ppe, MTK_PPE_TB_CFG, val);
+
+	return mtk_ppe_wait_busy(ppe);
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
new file mode 100644
index 000000000000..51bd5e75bbbd
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_PPE_H
+#define __MTK_PPE_H
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+
+#define MTK_ETH_PPE_BASE		0xc00
+
+#define MTK_PPE_ENTRIES_SHIFT		3
+#define MTK_PPE_ENTRIES			(1024 << MTK_PPE_ENTRIES_SHIFT)
+#define MTK_PPE_HASH_MASK		(MTK_PPE_ENTRIES - 1)
+
+#define MTK_FOE_IB1_UNBIND_TIMESTAMP	GENMASK(7, 0)
+#define MTK_FOE_IB1_UNBIND_PACKETS	GENMASK(23, 8)
+#define MTK_FOE_IB1_UNBIND_PREBIND	BIT(24)
+
+#define MTK_FOE_IB1_BIND_TIMESTAMP	GENMASK(14, 0)
+#define MTK_FOE_IB1_BIND_KEEPALIVE	BIT(15)
+#define MTK_FOE_IB1_BIND_VLAN_LAYER	GENMASK(18, 16)
+#define MTK_FOE_IB1_BIND_PPPOE		BIT(19)
+#define MTK_FOE_IB1_BIND_VLAN_TAG	BIT(20)
+#define MTK_FOE_IB1_BIND_PKT_SAMPLE	BIT(21)
+#define MTK_FOE_IB1_BIND_CACHE		BIT(22)
+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP	BIT(23)
+#define MTK_FOE_IB1_BIND_TTL		BIT(24)
+
+#define MTK_FOE_IB1_PACKET_TYPE		GENMASK(27, 25)
+#define MTK_FOE_IB1_STATE		GENMASK(29, 28)
+#define MTK_FOE_IB1_UDP			BIT(30)
+#define MTK_FOE_IB1_STATIC		BIT(31)
+
+enum {
+	MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
+	MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
+	MTK_PPE_PKT_TYPE_BRIDGE = 2,
+	MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
+	MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
+	MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
+	MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
+};
+
+#define MTK_FOE_IB2_QID			GENMASK(3, 0)
+#define MTK_FOE_IB2_PSE_QOS		BIT(4)
+#define MTK_FOE_IB2_DEST_PORT		GENMASK(7, 5)
+#define MTK_FOE_IB2_MULTICAST		BIT(8)
+
+#define MTK_FOE_IB2_WHNAT_QID2		GENMASK(13, 12)
+#define MTK_FOE_IB2_WHNAT_DEVIDX	BIT(16)
+#define MTK_FOE_IB2_WHNAT_NAT		BIT(17)
+
+#define MTK_FOE_IB2_PORT_MG		GENMASK(17, 12)
+
+#define MTK_FOE_IB2_PORT_AG		GENMASK(23, 18)
+
+#define MTK_FOE_IB2_DSCP		GENMASK(31, 24)
+
+#define MTK_FOE_VLAN2_WHNAT_BSS		GEMMASK(5, 0)
+#define MTK_FOE_VLAN2_WHNAT_WCID	GENMASK(13, 6)
+#define MTK_FOE_VLAN2_WHNAT_RING	GENMASK(15, 14)
+
+enum {
+	MTK_FOE_STATE_INVALID,
+	MTK_FOE_STATE_UNBIND,
+	MTK_FOE_STATE_BIND,
+	MTK_FOE_STATE_FIN
+};
+
+struct mtk_foe_mac_info {
+	u16 vlan1;
+	u16 etype;
+
+	u32 dest_mac_hi;
+
+	u16 vlan2;
+	u16 dest_mac_lo;
+
+	u32 src_mac_hi;
+
+	u16 pppoe_id;
+	u16 src_mac_lo;
+};
+
+struct mtk_foe_bridge {
+	u32 dest_mac_hi;
+
+	u16 src_mac_lo;
+	u16 dest_mac_lo;
+
+	u32 src_mac_hi;
+
+	u32 ib2;
+
+	u32 _rsv[5];
+
+	u32 udf_tsid;
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_ipv4_tuple {
+	u32 src_ip;
+	u32 dest_ip;
+	union {
+		struct {
+			u16 dest_port;
+			u16 src_port;
+		};
+		struct {
+			u8 protocol;
+			u8 _pad[3]; /* fill with 0xa5a5a5 */
+		};
+		u32 ports;
+	};
+};
+
+struct mtk_foe_ipv4 {
+	struct mtk_ipv4_tuple orig;
+
+	u32 ib2;
+
+	struct mtk_ipv4_tuple new;
+
+	u16 timestamp;
+	u16 _rsv0[3];
+
+	u32 udf_tsid;
+
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv4_dslite {
+	struct mtk_ipv4_tuple ip4;
+
+	u32 tunnel_src_ip[4];
+	u32 tunnel_dest_ip[4];
+
+	u8 flow_label[3];
+	u8 priority;
+
+	u32 udf_tsid;
+
+	u32 ib2;
+
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv6 {
+	u32 src_ip[4];
+	u32 dest_ip[4];
+
+	union {
+		struct {
+			u8 protocol;
+			u8 _pad[3]; /* fill with 0xa5a5a5 */
+		}; /* 3-tuple */
+		struct {
+			u16 dest_port;
+			u16 src_port;
+		}; /* 5-tuple */
+		u32 ports;
+	};
+
+	u32 _rsv[3];
+
+	u32 udf;
+
+	u32 ib2;
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv6_6rd {
+	u32 src_ip[4];
+	u32 dest_ip[4];
+	u16 dest_port;
+	u16 src_port;
+
+	u32 tunnel_src_ip;
+	u32 tunnel_dest_ip;
+
+	u16 hdr_csum;
+	u8 dscp;
+	u8 ttl;
+
+	u8 flag;
+	u8 pad;
+	u8 per_flow_6rd_id;
+	u8 pad2;
+
+	u32 ib2;
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_entry {
+	u32 ib1;
+
+	union {
+		struct mtk_foe_bridge bridge;
+		struct mtk_foe_ipv4 ipv4;
+		struct mtk_foe_ipv4_dslite dslite;
+		struct mtk_foe_ipv6 ipv6;
+		struct mtk_foe_ipv6_6rd ipv6_6rd;
+		u32 data[19];
+	};
+};
+
+enum {
+	MTK_PPE_CPU_REASON_TTL_EXCEEDED			= 0x02,
+	MTK_PPE_CPU_REASON_OPTION_HEADER		= 0x03,
+	MTK_PPE_CPU_REASON_NO_FLOW			= 0x07,
+	MTK_PPE_CPU_REASON_IPV4_FRAG			= 0x08,
+	MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG		= 0x09,
+	MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP	= 0x0a,
+	MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP		= 0x0b,
+	MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST		= 0x0c,
+	MTK_PPE_CPU_REASON_UN_HIT			= 0x0d,
+	MTK_PPE_CPU_REASON_HIT_UNBIND			= 0x0e,
+	MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED	= 0x0f,
+	MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN		= 0x10,
+	MTK_PPE_CPU_REASON_HIT_TTL_1			= 0x11,
+	MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION	= 0x12,
+	MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR		= 0x13,
+	MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR		= 0x14,
+	MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR	= 0x15,
+	MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU		= 0x16,
+	MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER		= 0x17,
+	MTK_PPE_CPU_REASON_MULTICAST_TO_CPU		= 0x18,
+	MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU	= 0x19,
+	MTK_PPE_CPU_REASON_HIT_PRE_BIND			= 0x1a,
+	MTK_PPE_CPU_REASON_PACKET_SAMPLING		= 0x1b,
+	MTK_PPE_CPU_REASON_EXCEED_MTU			= 0x1c,
+	MTK_PPE_CPU_REASON_PPE_BYPASS			= 0x1e,
+	MTK_PPE_CPU_REASON_INVALID			= 0x1f,
+};
+
+struct mtk_ppe {
+	struct device *dev;
+	void __iomem *base;
+	int version;
+
+	struct mtk_foe_entry *foe_table;
+	dma_addr_t foe_phys;
+
+	void *acct_table;
+};
+
+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+		 int version);
+int mtk_ppe_start(struct mtk_ppe *ppe);
+int mtk_ppe_stop(struct mtk_ppe *ppe);
+
+static inline void
+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
+{
+	ppe->foe_table[hash].ib1 = 0;
+	dma_wmb();
+}
+
+static inline int
+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
+{
+	u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
+
+	if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
+		return -1;
+
+	return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
+}
+
+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+			  u8 pse_port, u8 *src_mac, u8 *dest_mac);
+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
+				 __be32 src_addr, __be16 src_port,
+				 __be32 dest_addr, __be16 dest_port);
+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
+				 __be32 *src_addr, __be16 src_port,
+				 __be32 *dest_addr, __be16 dest_port);
+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+			 u16 timestamp);
+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
+
+#endif
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
new file mode 100644
index 000000000000..8ae9efab6d02
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include "mtk_eth_soc.h"
+
+struct mtk_flow_addr_info
+{
+	void *src, *dest;
+	u16 *src_port, *dest_port;
+	bool ipv6;
+};
+
+static const char *mtk_foe_entry_state_str(int state)
+{
+	static const char * const state_str[] = {
+		[MTK_FOE_STATE_INVALID] = "INV",
+		[MTK_FOE_STATE_UNBIND] = "UNB",
+		[MTK_FOE_STATE_BIND] = "BND",
+		[MTK_FOE_STATE_FIN] = "FIN",
+	};
+
+	if (state >= ARRAY_SIZE(state_str) || !state_str[state])
+		return "UNK";
+
+	return state_str[state];
+}
+
+static const char *mtk_foe_pkt_type_str(int type)
+{
+	static const char * const type_str[] = {
+		[MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
+		[MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
+		[MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
+		[MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
+		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
+		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
+		[MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
+	};
+
+	if (type >= ARRAY_SIZE(type_str) || !type_str[type])
+		return "UNKNOWN";
+
+	return type_str[type];
+}
+
+static void
+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
+{
+	u32 n_addr[4];
+	int i;
+
+	if (!ipv6) {
+		seq_printf(m, "%pI4h", addr);
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(n_addr); i++)
+		n_addr[i] = htonl(addr[i]);
+	seq_printf(m, "%pI6", n_addr);
+}
+
+static void
+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
+{
+	mtk_print_addr(m, ai->src, ai->ipv6);
+	if (ai->src_port)
+		seq_printf(m, ":%d", *ai->src_port);
+	seq_printf(m, "->");
+	mtk_print_addr(m, ai->dest, ai->ipv6);
+	if (ai->dest_port)
+		seq_printf(m, ":%d", *ai->dest_port);
+}
+
+static int
+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
+{
+	struct mtk_ppe *ppe = m->private;
+	int i, count;
+
+	for (i = 0, count = 0; i < MTK_PPE_ENTRIES; i++) {
+		struct mtk_foe_entry *entry = &ppe->foe_table[i];
+		struct mtk_foe_mac_info *l2;
+		struct mtk_flow_addr_info ai = {};
+		unsigned char h_source[ETH_ALEN];
+		unsigned char h_dest[ETH_ALEN];
+		int type, state;
+		u32 ib2;
+
+
+		state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
+		if (!state)
+			continue;
+
+		if (bind && state != MTK_FOE_STATE_BIND)
+			continue;
+
+		type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+		seq_printf(m, "%05x %s %7s", i,
+			   mtk_foe_entry_state_str(state),
+			   mtk_foe_pkt_type_str(type));
+
+		switch (type) {
+		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+			ai.src_port = &entry->ipv4.orig.src_port;
+			ai.dest_port = &entry->ipv4.orig.dest_port;
+			fallthrough;
+		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+			ai.src = &entry->ipv4.orig.src_ip;
+			ai.dest = &entry->ipv4.orig.dest_ip;
+			break;
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+			ai.src_port = &entry->ipv6.src_port;
+			ai.dest_port = &entry->ipv6.dest_port;
+			fallthrough;
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+		case MTK_PPE_PKT_TYPE_IPV6_6RD:
+			ai.src = &entry->ipv6.src_ip;
+			ai.dest = &entry->ipv6.dest_ip;
+			ai.ipv6 = true;
+			break;
+		}
+
+		seq_printf(m, " orig=");
+		mtk_print_addr_info(m, &ai);
+
+		switch (type) {
+		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+			ai.src_port = &entry->ipv4.new.src_port;
+			ai.dest_port = &entry->ipv4.new.dest_port;
+			fallthrough;
+		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+			ai.src = &entry->ipv4.new.src_ip;
+			ai.dest = &entry->ipv4.new.dest_ip;
+			seq_printf(m, " new=");
+			mtk_print_addr_info(m, &ai);
+			break;
+		}
+
+		if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+			l2 = &entry->ipv6.l2;
+			ib2 = entry->ipv6.ib2;
+		} else {
+			l2 = &entry->ipv4.l2;
+			ib2 = entry->ipv4.ib2;
+		}
+
+		*((__be32 *)h_source) = htonl(l2->src_mac_hi);
+		*((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
+		*((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
+		*((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
+
+		seq_printf(m, " eth=%pM->%pM etype=%04x"
+			      " vlan=%d,%d ib1=%08x ib2=%08x\n",
+			   h_source, h_dest, ntohs(l2->etype),
+			   l2->vlan1, l2->vlan2, entry->ib1, ib2);
+	}
+
+	return 0;
+}
+
+static int
+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
+{
+	return mtk_ppe_debugfs_foe_show(m, private, false);
+}
+
+static int
+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
+{
+	return mtk_ppe_debugfs_foe_show(m, private, true);
+}
+
+static int
+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtk_ppe_debugfs_foe_show_all,
+			   inode->i_private);
+}
+
+static int
+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtk_ppe_debugfs_foe_show_bind,
+			   inode->i_private);
+}
+
+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
+{
+	static const struct file_operations fops_all = {
+		.open = mtk_ppe_debugfs_foe_open_all,
+		.read = seq_read,
+		.llseek = seq_lseek,
+		.release = single_release,
+	};
+
+	static const struct file_operations fops_bind = {
+		.open = mtk_ppe_debugfs_foe_open_bind,
+		.read = seq_read,
+		.llseek = seq_lseek,
+		.release = single_release,
+	};
+
+	struct dentry *root;
+
+	root = debugfs_create_dir("mtk_ppe", NULL);
+	if (!root)
+		return -ENOMEM;
+
+	debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
+	debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
new file mode 100644
index 000000000000..d0c46786571f
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
+ */
+
+#include <linux/if_ether.h>
+#include <linux/rhashtable.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <net/flow_offload.h>
+#include <net/pkt_cls.h>
+#include <net/dsa.h>
+#include "mtk_eth_soc.h"
+
+struct mtk_flow_data {
+	struct ethhdr eth;
+
+	union {
+		struct {
+			__be32 src_addr;
+			__be32 dst_addr;
+		} v4;
+	};
+
+	__be16 src_port;
+	__be16 dst_port;
+
+	struct {
+		u16 id;
+		__be16 proto;
+		u8 num;
+	} vlan;
+	struct {
+		u16 sid;
+		u8 num;
+	} pppoe;
+};
+
+struct mtk_flow_entry {
+	struct rhash_head node;
+	unsigned long cookie;
+	u16 hash;
+};
+
+static const struct rhashtable_params mtk_flow_ht_params = {
+	.head_offset = offsetof(struct mtk_flow_entry, node),
+	.head_offset = offsetof(struct mtk_flow_entry, cookie),
+	.key_len = sizeof(unsigned long),
+	.automatic_shrinking = true,
+};
+
+static u32
+mtk_eth_timestamp(struct mtk_eth *eth)
+{
+	return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
+}
+
+static int
+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
+		       bool egress)
+{
+	return mtk_foe_entry_set_ipv4_tuple(foe, egress,
+					    data->v4.src_addr, data->src_port,
+					    data->v4.dst_addr, data->dst_port);
+}
+
+static void
+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
+{
+	void *dest = eth + act->mangle.offset;
+	const void *src = &act->mangle.val;
+
+	if (act->mangle.offset > 8)
+		return;
+
+	if (act->mangle.mask == 0xffff) {
+		src += 2;
+		dest += 2;
+	}
+
+	memcpy(dest, src, act->mangle.mask ? 2 : 4);
+}
+
+
+static int
+mtk_flow_mangle_ports(const struct flow_action_entry *act,
+		      struct mtk_flow_data *data)
+{
+	u32 val = ntohl(act->mangle.val);
+
+	switch (act->mangle.offset) {
+	case 0:
+		if (act->mangle.mask == ~htonl(0xffff))
+			data->dst_port = cpu_to_be16(val);
+		else
+			data->src_port = cpu_to_be16(val >> 16);
+		break;
+	case 2:
+		data->dst_port = cpu_to_be16(val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
+		     struct mtk_flow_data *data)
+{
+	__be32 *dest;
+
+	switch (act->mangle.offset) {
+	case offsetof(struct iphdr, saddr):
+		dest = &data->v4.src_addr;
+		break;
+	case offsetof(struct iphdr, daddr):
+		dest = &data->v4.dst_addr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memcpy(dest, &act->mangle.val, sizeof(u32));
+
+	return 0;
+}
+
+static int
+mtk_flow_get_dsa_port(struct net_device **dev)
+{
+#if IS_ENABLED(CONFIG_NET_DSA)
+	struct dsa_port *dp;
+
+	dp = dsa_port_from_netdev(*dev);
+	if (IS_ERR(dp))
+		return -ENODEV;
+
+	if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
+		return -ENODEV;
+
+	*dev = dp->cpu_dp->master;
+
+	return dp->index;
+#else
+	return -ENODEV;
+#endif
+}
+
+static int
+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+			   struct net_device *dev)
+{
+	int pse_port, dsa_port;
+
+	dsa_port = mtk_flow_get_dsa_port(&dev);
+	if (dsa_port >= 0)
+		mtk_foe_entry_set_dsa(foe, dsa_port);
+
+	if (dev == eth->netdev[0])
+		pse_port = 1;
+	else if (dev == eth->netdev[1])
+		pse_port = 2;
+	else
+		return -EOPNOTSUPP;
+
+	mtk_foe_entry_set_pse_port(foe, pse_port);
+
+	return 0;
+}
+
+static int
+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_action_entry *act;
+	struct mtk_flow_data data = {};
+	struct mtk_foe_entry foe;
+	struct net_device *odev = NULL;
+	struct mtk_flow_entry *entry;
+	int offload_type = 0;
+	u16 addr_type = 0;
+	u32 timestamp;
+	u8 l4proto = 0;
+	int err = 0;
+	int hash;
+	int i;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+		struct flow_match_meta match;
+
+		flow_rule_match_meta(rule, &match);
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		l4proto = match.key->ip_proto;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	flow_action_for_each(i, act, &rule->action) {
+		switch (act->id) {
+		case FLOW_ACTION_MANGLE:
+			if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
+				mtk_flow_offload_mangle_eth(act, &data.eth);
+			break;
+		case FLOW_ACTION_REDIRECT:
+			odev = act->dev;
+			break;
+		case FLOW_ACTION_CSUM:
+			break;
+		case FLOW_ACTION_VLAN_PUSH:
+			if (data.vlan.num == 1 ||
+			    act->vlan.proto != htons(ETH_P_8021Q))
+				return -EOPNOTSUPP;
+
+			data.vlan.id = act->vlan.vid;
+			data.vlan.proto = act->vlan.proto;
+			data.vlan.num++;
+			break;
+		case FLOW_ACTION_PPPOE_PUSH:
+			if (data.pppoe.num == 1)
+				return -EOPNOTSUPP;
+
+			data.pppoe.sid = act->pppoe.sid;
+			data.pppoe.num++;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	switch (addr_type) {
+	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+		offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (!is_valid_ether_addr(data.eth.h_source) ||
+	    !is_valid_ether_addr(data.eth.h_dest))
+		return -EINVAL;
+
+	err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
+				    data.eth.h_source,
+				    data.eth.h_dest);
+	if (err)
+		return err;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports ports;
+
+		flow_rule_match_ports(rule, &ports);
+		data.src_port = ports.key->src;
+		data.dst_port = ports.key->dst;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs addrs;
+
+		flow_rule_match_ipv4_addrs(rule, &addrs);
+
+		data.v4.src_addr = addrs.key->src;
+		data.v4.dst_addr = addrs.key->dst;
+
+		mtk_flow_set_ipv4_addr(&foe, &data, false);
+	}
+
+	flow_action_for_each(i, act, &rule->action) {
+		if (act->id != FLOW_ACTION_MANGLE)
+			continue;
+
+		switch (act->mangle.htype) {
+		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+			err = mtk_flow_mangle_ports(act, &data);
+			break;
+		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+			err = mtk_flow_mangle_ipv4(act, &data);
+			break;
+		case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+			/* handled earlier */
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+
+		if (err)
+			return err;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		err = mtk_flow_set_ipv4_addr(&foe, &data, true);
+		if (err)
+			return err;
+	}
+
+	if (data.vlan.num == 1) {
+		if (data.vlan.proto != htons(ETH_P_8021Q))
+			return -EOPNOTSUPP;
+
+		mtk_foe_entry_set_vlan(&foe, data.vlan.id);
+	}
+	if (data.pppoe.num == 1)
+		mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
+
+	err = mtk_flow_set_output_device(eth, &foe, odev);
+	if (err)
+		return err;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->cookie = f->cookie;
+	timestamp = mtk_eth_timestamp(eth);
+	hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
+	if (hash < 0) {
+		err = hash;
+		goto free;
+	}
+
+	entry->hash = hash;
+	err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
+				     mtk_flow_ht_params);
+	if (err < 0)
+		goto clear_flow;
+
+	return 0;
+clear_flow:
+	mtk_foe_entry_clear(&eth->ppe, hash);
+free:
+	kfree(entry);
+	return err;
+}
+
+static int
+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+	struct mtk_flow_entry *entry;
+
+	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+				  mtk_flow_ht_params);
+	if (!entry)
+		return -ENOENT;
+
+	mtk_foe_entry_clear(&eth->ppe, entry->hash);
+	rhashtable_remove_fast(&eth->flow_table, &entry->node,
+			       mtk_flow_ht_params);
+	kfree(entry);
+
+	return 0;
+}
+
+static int
+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+	struct mtk_flow_entry *entry;
+	int timestamp;
+	u32 idle;
+
+	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+				  mtk_flow_ht_params);
+	if (!entry)
+		return -ENOENT;
+
+	timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
+	if (timestamp < 0)
+		return -ETIMEDOUT;
+
+	idle = mtk_eth_timestamp(eth) - timestamp;
+	f->stats.lastused = jiffies - idle * HZ;
+
+	return 0;
+}
+
+static int
+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+	struct flow_cls_offload *cls = type_data;
+	struct net_device *dev = cb_priv;
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+
+	if (!tc_can_offload(dev))
+		return -EOPNOTSUPP;
+
+	if (type != TC_SETUP_CLSFLOWER)
+		return -EOPNOTSUPP;
+
+	switch (cls->command) {
+	case FLOW_CLS_REPLACE:
+		return mtk_flow_offload_replace(eth, cls);
+	case FLOW_CLS_DESTROY:
+		return mtk_flow_offload_destroy(eth, cls);
+	case FLOW_CLS_STATS:
+		return mtk_flow_offload_stats(eth, cls);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	static LIST_HEAD(block_cb_list);
+	struct flow_block_cb *block_cb;
+	flow_setup_cb_t *cb;
+
+	if (!eth->ppe.foe_table)
+		return -EOPNOTSUPP;
+
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	cb = mtk_eth_setup_tc_block_cb;
+	f->driver_block_list = &block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		block_cb = flow_block_cb_lookup(f->block, cb, dev);
+		if (block_cb) {
+			flow_block_cb_incref(block_cb);
+			return 0;
+		}
+		block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &block_cb_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f->block, cb, dev);
+		if (!block_cb)
+			return -ENOENT;
+
+		if (flow_block_cb_decref(block_cb)) {
+			flow_block_cb_remove(block_cb, f);
+			list_del(&block_cb->driver_list);
+		}
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data)
+{
+	if (type == TC_SETUP_FT)
+		return mtk_eth_setup_tc_block(dev, type_data);
+
+	return -EOPNOTSUPP;
+}
+
+int mtk_eth_offload_init(struct mtk_eth *eth)
+{
+	if (!eth->ppe.foe_table)
+		return 0;
+
+	return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
new file mode 100644
index 000000000000..0c45ea0900f1
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_PPE_REGS_H
+#define __MTK_PPE_REGS_H
+
+#define MTK_PPE_GLO_CFG				0x200
+#define MTK_PPE_GLO_CFG_EN			BIT(0)
+#define MTK_PPE_GLO_CFG_TSID_EN			BIT(1)
+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP		BIT(2)
+#define MTK_PPE_GLO_CFG_IP4_CS_DROP		BIT(3)
+#define MTK_PPE_GLO_CFG_TTL0_DROP		BIT(4)
+#define MTK_PPE_GLO_CFG_PPE_BSWAP		BIT(5)
+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS		BIT(6)
+#define MTK_PPE_GLO_CFG_MCAST_TB_EN		BIT(7)
+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA		BIT(8)
+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE	BIT(9)
+#define MTK_PPE_GLO_CFG_UDP_LITE_EN		BIT(10)
+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP		BIT(11)
+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES		GNEMASK(13, 12)
+#define MTK_PPE_GLO_CFG_BUSY			BIT(31)
+
+#define MTK_PPE_FLOW_CFG			0x204
+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG		BIT(6)
+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG		BIT(7)
+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE		BIT(8)
+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE		BIT(9)
+#define MTK_PPE_FLOW_CFG_IP6_6RD		BIT(10)
+#define MTK_PPE_FLOW_CFG_IP4_NAT		BIT(12)
+#define MTK_PPE_FLOW_CFG_IP4_NAPT		BIT(13)
+#define MTK_PPE_FLOW_CFG_IP4_DSLITE		BIT(14)
+#define MTK_PPE_FLOW_CFG_L2_BRIDGE		BIT(15)
+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST	BIT(16)
+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG		BIT(17)
+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL	BIT(18)
+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY	BIT(19)
+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY	BIT(20)
+
+#define MTK_PPE_IP_PROTO_CHK			0x208
+#define MTK_PPE_IP_PROTO_CHK_IPV4		GENMASK(15, 0)
+#define MTK_PPE_IP_PROTO_CHK_IPV6		GENMASK(31, 16)
+
+#define MTK_PPE_TB_CFG				0x21c
+#define MTK_PPE_TB_CFG_ENTRY_NUM		GENMASK(2, 0)
+#define MTK_PPE_TB_CFG_ENTRY_80B		BIT(3)
+#define MTK_PPE_TB_CFG_SEARCH_MISS		GENMASK(5, 4)
+#define MTK_PPE_TB_CFG_AGE_PREBIND		BIT(6)
+#define MTK_PPE_TB_CFG_AGE_NON_L4		BIT(7)
+#define MTK_PPE_TB_CFG_AGE_UNBIND		BIT(8)
+#define MTK_PPE_TB_CFG_AGE_TCP			BIT(9)
+#define MTK_PPE_TB_CFG_AGE_UDP			BIT(10)
+#define MTK_PPE_TB_CFG_AGE_TCP_FIN		BIT(11)
+#define MTK_PPE_TB_CFG_KEEPALIVE		GENMASK(13, 12)
+#define MTK_PPE_TB_CFG_HASH_MODE		GENMASK(15, 14)
+#define MTK_PPE_TB_CFG_SCAN_MODE		GENMASK(17, 16)
+#define MTK_PPE_TB_CFG_HASH_DEBUG		GENMASK(19, 18)
+
+enum {
+	MTK_PPE_SCAN_MODE_DISABLED,
+	MTK_PPE_SCAN_MODE_CHECK_AGE,
+	MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
+};
+
+enum {
+	MTK_PPE_KEEPALIVE_DISABLE,
+	MTK_PPE_KEEPALIVE_UNICAST_CPU,
+	MTK_PPE_KEEPALIVE_DUP_CPU = 3,
+};
+
+enum {
+	MTK_PPE_SEARCH_MISS_ACTION_DROP,
+	MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
+	MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
+};
+
+#define MTK_PPE_TB_BASE				0x220
+
+#define MTK_PPE_TB_USED				0x224
+#define MTK_PPE_TB_USED_NUM			GENMASK(13, 0)
+
+#define MTK_PPE_BIND_RATE			0x228
+#define MTK_PPE_BIND_RATE_BIND			GENMASK(15, 0)
+#define MTK_PPE_BIND_RATE_PREBIND		GENMASK(31, 16)
+
+#define MTK_PPE_BIND_LIMIT0			0x22c
+#define MTK_PPE_BIND_LIMIT0_QUARTER		GENMASK(13, 0)
+#define MTK_PPE_BIND_LIMIT0_HALF		GENMASK(29, 16)
+
+#define MTK_PPE_BIND_LIMIT1			0x230
+#define MTK_PPE_BIND_LIMIT1_FULL		GENMASK(13, 0)
+#define MTK_PPE_BIND_LIMIT1_NON_L4		GENMASK(23, 16)
+
+#define MTK_PPE_KEEPALIVE			0x234
+#define MTK_PPE_KEEPALIVE_TIME			GENMASK(15, 0)
+#define MTK_PPE_KEEPALIVE_TIME_TCP		GENMASK(23, 16)
+#define MTK_PPE_KEEPALIVE_TIME_UDP		GENMASK(31, 24)
+
+#define MTK_PPE_UNBIND_AGE			0x238
+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS		GENMASK(31, 16)
+#define MTK_PPE_UNBIND_AGE_DELTA		GENMASK(7, 0)
+
+#define MTK_PPE_BIND_AGE0			0x23c
+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4		GENMASK(30, 16)
+#define MTK_PPE_BIND_AGE0_DELTA_UDP		GENMASK(14, 0)
+
+#define MTK_PPE_BIND_AGE1			0x240
+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN		GENMASK(30, 16)
+#define MTK_PPE_BIND_AGE1_DELTA_TCP		GENMASK(14, 0)
+
+#define MTK_PPE_HASH_SEED			0x244
+
+#define MTK_PPE_DEFAULT_CPU_PORT		0x248
+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n)	(GENMASK(2, 0) << ((_n) * 4))
+
+#define MTK_PPE_MTU_DROP			0x308
+
+#define MTK_PPE_VLAN_MTU0			0x30c
+#define MTK_PPE_VLAN_MTU0_NONE			GENMASK(13, 0)
+#define MTK_PPE_VLAN_MTU0_1TAG			GENMASK(29, 16)
+
+#define MTK_PPE_VLAN_MTU1			0x310
+#define MTK_PPE_VLAN_MTU1_2TAG			GENMASK(13, 0)
+#define MTK_PPE_VLAN_MTU1_3TAG			GENMASK(29, 16)
+
+#define MTK_PPE_VPM_TPID			0x318
+
+#define MTK_PPE_CACHE_CTL			0x320
+#define MTK_PPE_CACHE_CTL_EN			BIT(0)
+#define MTK_PPE_CACHE_CTL_LOCK_CLR		BIT(4)
+#define MTK_PPE_CACHE_CTL_REQ			BIT(8)
+#define MTK_PPE_CACHE_CTL_CLEAR			BIT(9)
+#define MTK_PPE_CACHE_CTL_CMD			GENMASK(13, 12)
+
+#define MTK_PPE_MIB_CFG				0x334
+#define MTK_PPE_MIB_CFG_EN			BIT(0)
+#define MTK_PPE_MIB_CFG_RD_CLR			BIT(1)
+
+#define MTK_PPE_MIB_TB_BASE			0x338
+
+#define MTK_PPE_MIB_CACHE_CTL			0x350
+#define MTK_PPE_MIB_CACHE_CTL_EN		BIT(0)
+#define MTK_PPE_MIB_CACHE_CTL_FLUSH		BIT(2)
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index e8cecd50558d..9d79c5ec31e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -263,15 +263,15 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
 	return 0;
 }
 
-static void dump_buf(void *buf, int size, int data_only, int offset)
+static void dump_buf(void *buf, int size, int data_only, int offset, int idx)
 {
 	__be32 *p = buf;
 	int i;
 
 	for (i = 0; i < size; i += 16) {
-		pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
-			 be32_to_cpu(p[1]), be32_to_cpu(p[2]),
-			 be32_to_cpu(p[3]));
+		pr_debug("cmd[%d]: %03x: %08x %08x %08x %08x\n", idx, offset,
+			 be32_to_cpu(p[0]), be32_to_cpu(p[1]),
+			 be32_to_cpu(p[2]), be32_to_cpu(p[3]));
 		p += 4;
 		offset += 16;
 	}
@@ -802,39 +802,41 @@ static void dump_command(struct mlx5_core_dev *dev,
 	int dump_len;
 	int i;
 
+	mlx5_core_dbg(dev, "cmd[%d]: start dump\n", ent->idx);
 	data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
 
 	if (data_only)
 		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
-				   "dump command data %s(0x%x) %s\n",
-				   mlx5_command_str(op), op,
+				   "cmd[%d]: dump command data %s(0x%x) %s\n",
+				   ent->idx, mlx5_command_str(op), op,
 				   input ? "INPUT" : "OUTPUT");
 	else
-		mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
-			      mlx5_command_str(op), op,
+		mlx5_core_dbg(dev, "cmd[%d]: dump command %s(0x%x) %s\n",
+			      ent->idx, mlx5_command_str(op), op,
 			      input ? "INPUT" : "OUTPUT");
 
 	if (data_only) {
 		if (input) {
-			dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
+			dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset, ent->idx);
 			offset += sizeof(ent->lay->in);
 		} else {
-			dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
+			dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset, ent->idx);
 			offset += sizeof(ent->lay->out);
 		}
 	} else {
-		dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
+		dump_buf(ent->lay, sizeof(*ent->lay), 0, offset, ent->idx);
 		offset += sizeof(*ent->lay);
 	}
 
 	for (i = 0; i < n && next; i++)  {
 		if (data_only) {
 			dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
-			dump_buf(next->buf, dump_len, 1, offset);
+			dump_buf(next->buf, dump_len, 1, offset, ent->idx);
 			offset += MLX5_CMD_DATA_BLOCK_SIZE;
 		} else {
-			mlx5_core_dbg(dev, "command block:\n");
-			dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
+			mlx5_core_dbg(dev, "cmd[%d]: command block:\n", ent->idx);
+			dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset,
+				 ent->idx);
 			offset += sizeof(struct mlx5_cmd_prot_block);
 		}
 		next = next->next;
@@ -842,6 +844,8 @@ static void dump_command(struct mlx5_core_dev *dev,
 
 	if (data_only)
 		pr_debug("\n");
+
+	mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx);
 }
 
 static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index b051417ede67..4def64d0e669 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -58,9 +58,6 @@ static bool is_eth_supported(struct mlx5_core_dev *dev)
 	if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
 		return false;
 
-	if (is_eth_rep_supported(dev))
-		return false;
-
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return false;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index d7d8a68ef23d..38c7c44fe883 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -137,18 +137,18 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 		 * unregistering devlink instance while holding devlink_mutext.
 		 * Hence, do not support reload.
 		 */
-		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated\n");
+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated");
 		return -EOPNOTSUPP;
 	}
 
 	if (mlx5_lag_is_active(dev)) {
-		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n");
+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
 		return -EOPNOTSUPP;
 	}
 
 	switch (action) {
 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
-		mlx5_unload_one(dev, false);
+		mlx5_unload_one(dev);
 		return 0;
 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
 		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
@@ -170,13 +170,13 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
 	*actions_performed = BIT(action);
 	switch (action) {
 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
-		return mlx5_load_one(dev, false);
+		return mlx5_load_one(dev);
 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
 		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
 			break;
 		/* On fw_activate action, also driver is reloaded and reinit performed */
 		*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
-		return mlx5_load_one(dev, false);
+		return mlx5_load_one(dev);
 	default:
 		/* Unsupported action should not get to this function */
 		WARN_ON(1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 2eb022ad7fd0..01a1d02dcf15 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -1100,7 +1100,7 @@ int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
 	int err;
 
 	if (IS_ERR_OR_NULL(tracer))
-		return -EINVAL;
+		return 0;
 
 	dev = tracer->dev;
 	mlx5_fw_tracer_cleanup(tracer);
@@ -1126,8 +1126,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void
 
 	switch (eqe->sub_type) {
 	case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
-		if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
-			queue_work(tracer->work_queue, &tracer->ownership_change_work);
+		queue_work(tracer->work_queue, &tracer->ownership_change_work);
 		break;
 	case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
 		if (likely(tracer->str_db.loaded))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 304b296fe8b9..9ea3f3befe74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -881,7 +881,6 @@ struct mlx5e_priv {
 #endif
 	struct devlink_health_reporter *tx_reporter;
 	struct devlink_health_reporter *rx_reporter;
-	struct devlink_port            dl_port;
 	struct mlx5e_xsk           xsk;
 #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
 	struct mlx5e_hv_vhca_stats_agent stats_agent;
@@ -1175,6 +1174,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
 int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
 				const struct mlx5e_profile *new_profile, void *new_ppriv);
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
 void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index a69c62d72d16..765f3064689d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -2,37 +2,65 @@
 /* Copyright (c) 2020, Mellanox Technologies inc.  All rights reserved. */
 
 #include "en/devlink.h"
+#include "eswitch.h"
+
+static void
+mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+	u64 parent_id;
+
+	parent_id = mlx5_query_nic_system_image_guid(dev);
+	ppid->id_len = sizeof(parent_id);
+	memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
 
 int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
 {
 	struct devlink *devlink = priv_to_devlink(priv->mdev);
 	struct devlink_port_attrs attrs = {};
+	struct netdev_phys_item_id ppid = {};
+	struct devlink_port *dl_port;
+	unsigned int dl_port_index;
 
 	if (mlx5_core_is_pf(priv->mdev)) {
 		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
 		attrs.phys.port_number = PCI_FUNC(priv->mdev->pdev->devfn);
+		if (MLX5_ESWITCH_MANAGER(priv->mdev)) {
+			mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid);
+			memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+			attrs.switch_id.id_len = ppid.id_len;
+		}
+		dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev,
+								     MLX5_VPORT_UPLINK);
 	} else {
 		attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+		dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0);
 	}
 
-	devlink_port_attrs_set(&priv->dl_port, &attrs);
+	dl_port = mlx5e_devlink_get_dl_port(priv);
+	memset(dl_port, 0, sizeof(*dl_port));
+	devlink_port_attrs_set(dl_port, &attrs);
 
-	return devlink_port_register(devlink, &priv->dl_port, 1);
+	return devlink_port_register(devlink, dl_port, dl_port_index);
 }
 
 void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
 {
-	devlink_port_type_eth_set(&priv->dl_port, priv->netdev);
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+	devlink_port_type_eth_set(dl_port, priv->netdev);
 }
 
 void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
 {
-	devlink_port_unregister(&priv->dl_port);
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+	devlink_port_unregister(dl_port);
 }
 
 struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	return &priv->dl_port;
+	return mlx5e_devlink_get_dl_port(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
index 83123a801adc..10b50feb9883 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
@@ -12,4 +12,10 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv);
 void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv);
 struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev);
 
+static inline struct devlink_port *
+mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv)
+{
+	return &priv->mdev->mlx5e_res.dl_port;
+}
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index d57b6f06382f..bb5d108f75d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -174,7 +174,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix,
 	sq->mdev      = mdev;
 	sq->ch_ix     = c->ix;
 	sq->txq_ix    = txq_ix;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	sq->stats     = &c->priv->port_ptp_stats.sq[tc];
@@ -475,7 +475,7 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	c->ix       = 0;
 	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
 	c->num_tc   = params->num_tc;
 	c->stats    = &priv->port_ptp_stats.ch;
 	c->lag_port = lag_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 065126370acd..11a44d30adc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -169,6 +169,9 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
 	struct mlx5e_priv *priv = cb_priv;
 
+	if (!priv->netdev || !netif_device_present(priv->netdev))
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
@@ -321,6 +324,9 @@ mlx5e_rep_indr_offload(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
 	int err = 0;
 
+	if (!netif_device_present(indr_priv->rpriv->netdev))
+		return -EOPNOTSUPP;
+
 	switch (flower->command) {
 	case FLOW_CLS_REPLACE:
 		err = mlx5e_configure_flower(netdev, priv, flower, flags);
@@ -621,11 +627,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	int err;
 
 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
-	if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
-		reg_c0 = 0;
-	reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
-	if (!reg_c0)
+	if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
 		return true;
 
 	/* If reg_c0 is not equal to the default flow tag then skb->mark
@@ -633,6 +635,8 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	 */
 	skb->mark = 0;
 
+	reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
 	priv = netdev_priv(skb->dev);
 	esw = priv->mdev->priv.eswitch;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index d80bbd17e5f8..34b3b316b688 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -4,6 +4,7 @@
 #include "health.h"
 #include "params.h"
 #include "txrx.h"
+#include "devlink.h"
 
 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
 {
@@ -615,9 +616,10 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
 
 void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
 {
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
 	struct devlink_health_reporter *reporter;
 
-	reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_rx_reporter_ops,
+	reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops,
 						       MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
 	if (IS_ERR(reporter)) {
 		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
@@ -633,4 +635,5 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
 		return;
 
 	devlink_port_health_reporter_destroy(priv->rx_reporter);
+	priv->rx_reporter = NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index d7275c84313e..63ee3b9416de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -3,6 +3,7 @@
 
 #include "health.h"
 #include "en/ptp.h"
+#include "en/devlink.h"
 
 static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
 {
@@ -572,9 +573,10 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
 
 void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
 {
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
 	struct devlink_health_reporter *reporter;
 
-	reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_tx_reporter_ops,
+	reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops,
 						       MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
 	if (IS_ERR(reporter)) {
 		netdev_warn(priv->netdev,
@@ -591,4 +593,5 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
 		return;
 
 	devlink_port_health_reporter_destroy(priv->tx_reporter);
+	priv->tx_reporter = NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index b2cd29847a37..df13e5094034 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -695,7 +695,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 
 	zone_rule->nat = nat;
 
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
 		return -ENOMEM;
 
@@ -737,7 +737,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 
 	zone_rule->attr = attr;
 
-	kfree(spec);
+	kvfree(spec);
 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
 
 	return 0;
@@ -749,7 +749,7 @@ err_rule:
 err_mod_hdr:
 	kfree(attr);
 err_attr:
-	kfree(spec);
+	kvfree(spec);
 	return err;
 }
 
@@ -1540,6 +1540,14 @@ mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
 }
 
+/* To avoid false lock dependency warning set the ct_entries_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key ct_entries_ht_lock_key;
+
 static struct mlx5_ct_ft *
 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
 		     struct nf_flowtable *nf_ft)
@@ -1574,6 +1582,8 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
 	if (err)
 		goto err_init;
 
+	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
+
 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
 				     zone_params);
 	if (err)
@@ -1675,10 +1685,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 	struct mlx5_ct_ft *ft;
 	u32 fte_id = 1;
 
-	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
+	post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
 	if (!post_ct_spec || !ct_flow) {
-		kfree(post_ct_spec);
+		kvfree(post_ct_spec);
 		kfree(ct_flow);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1788,6 +1798,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 	ct_flow->post_ct_attr->prio = 0;
 	ct_flow->post_ct_attr->ft = ct_priv->post_ct;
 
+	/* Splits were handled before CT */
+	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+		ct_flow->post_ct_attr->esw_attr->split_count = 0;
+
 	ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
 	ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
 	ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
@@ -1813,7 +1827,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 
 	attr->ct_attr.ct_flow = ct_flow;
 	dealloc_mod_hdr_actions(&pre_mod_acts);
-	kfree(post_ct_spec);
+	kvfree(post_ct_spec);
 
 	return rule;
 
@@ -1834,7 +1848,7 @@ err_alloc_pre:
 err_idr:
 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
 err_ft:
-	kfree(post_ct_spec);
+	kvfree(post_ct_spec);
 	kfree(ct_flow);
 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
 	return ERR_PTR(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 67de2bf36861..89d5ca91566e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -76,10 +76,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
 static inline int
 mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 				struct net_device *mirred_dev,
-				struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; }
-int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
-				    struct net_device *mirred_dev,
-				    struct mlx5e_encap_entry *e)
+				struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
+static inline int
+mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+				struct net_device *mirred_dev,
+				struct mlx5e_encap_entry *e)
 { return -EOPNOTSUPP; }
 #endif
 int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 7f7b0f6dcdf9..01d435e15ad3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2021 Mellanox Technologies. */
 
 #include <net/fib_notifier.h>
+#include <net/nexthop.h>
 #include "tc_tun_encap.h"
 #include "en_tc.h"
 #include "tc_tun.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 37fc1d77ded7..41db93883fea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -84,7 +84,7 @@ static int mlx5e_alloc_trap_rq(struct mlx5e_priv *priv, struct mlx5e_rq_param *r
 	if (err)
 		goto err_free_frags;
 
-	rq->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
 
 	mlx5e_rq_set_trap_handlers(rq, params);
 
@@ -213,7 +213,7 @@ static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct ml
 		return -ENOMEM;
 
 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
 	MLX5_SET(tirc, tirc, inline_rqn, rqn);
@@ -266,7 +266,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
 	t->tstamp   = &priv->tstamp;
 	t->pdev     = mlx5_core_dma_dev(priv->mdev);
 	t->netdev   = priv->netdev;
-	t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
 	t->stats    = &priv->trap_stats.ch;
 
 	netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 381a9c8c9da9..34119ce92031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -60,7 +60,7 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
 		return -ENOMEM;
 
@@ -101,7 +101,7 @@ out:
 	if (err)
 		mlx5_modify_header_dealloc(mdev, modify_hdr);
 out_spec:
-	kfree(spec);
+	kvfree(spec);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index d06532d0baa4..f7c880edae37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -84,7 +84,7 @@ static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn
 
 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 
-	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
 	MLX5_SET(tirc, tirc, indirect_table, rqtn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index a6cf008057b5..8c166ee56d8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -38,15 +38,16 @@
 
 int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in)
 {
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 	int err;
 
 	err = mlx5_core_create_tir(mdev, in, &tir->tirn);
 	if (err)
 		return err;
 
-	mutex_lock(&mdev->mlx5e_res.td.list_lock);
-	list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
-	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+	mutex_lock(&res->td.list_lock);
+	list_add(&tir->list, &res->td.tirs_list);
+	mutex_unlock(&res->td.list_lock);
 
 	return 0;
 }
@@ -54,10 +55,12 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in)
 void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir)
 {
-	mutex_lock(&mdev->mlx5e_res.td.list_lock);
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
+
+	mutex_lock(&res->td.list_lock);
 	mlx5_core_destroy_tir(mdev, tir->tirn);
 	list_del(&tir->list);
-	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+	mutex_unlock(&res->td.list_lock);
 }
 
 void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
@@ -99,7 +102,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
 
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 {
-	struct mlx5e_resources *res = &mdev->mlx5e_res;
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 	int err;
 
 	err = mlx5_core_alloc_pd(mdev, &res->pdn);
@@ -126,8 +129,8 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 		goto err_destroy_mkey;
 	}
 
-	INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
-	mutex_init(&mdev->mlx5e_res.td.list_lock);
+	INIT_LIST_HEAD(&res->td.tirs_list);
+	mutex_init(&res->td.list_lock);
 
 	return 0;
 
@@ -142,7 +145,7 @@ err_dealloc_pd:
 
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 {
-	struct mlx5e_resources *res = &mdev->mlx5e_res;
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 
 	mlx5_free_bfreg(mdev, &res->bfreg);
 	mlx5_core_destroy_mkey(mdev, &res->mkey);
@@ -180,8 +183,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
-	mutex_lock(&mdev->mlx5e_res.td.list_lock);
-	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
+	mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock);
+	list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) {
 		tirn = tir->tirn;
 		err = mlx5_core_modify_tir(mdev, tirn, in);
 		if (err)
@@ -192,7 +195,7 @@ out:
 	kvfree(in);
 	if (err)
 		netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
-	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+	mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 16ce7756ac43..cf1d3c9c88af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -36,6 +36,7 @@
 #include <linux/tcp.h>
 #include <linux/mlx5/fs.h>
 #include "en.h"
+#include "en_rep.h"
 #include "lib/mpfs.h"
 
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
@@ -435,6 +436,9 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
+	if (mlx5e_is_uplink_rep(priv))
+		return 0; /* no vlan table for uplink rep */
+
 	if (be16_to_cpu(proto) == ETH_P_8021Q)
 		return mlx5e_vlan_rx_add_cvid(priv, vid);
 	else if (be16_to_cpu(proto) == ETH_P_8021AD)
@@ -447,6 +451,9 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
+	if (mlx5e_is_uplink_rep(priv))
+		return 0; /* no vlan table for uplink rep */
+
 	if (be16_to_cpu(proto) == ETH_P_8021Q) {
 		clear_bit(vid, priv->fs.vlan.active_cvlans);
 		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 158f947a8503..d40fc2672530 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -302,7 +302,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
 	mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
-	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
 	MLX5_SET64(mkc, mkc, len, npages << page_shift);
 	MLX5_SET(mkc, mkc, translations_octword_size,
 		 MLX5_MTT_OCTW(npages));
@@ -1019,7 +1019,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 	sq->pdev      = c->pdev;
 	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	sq->xsk_pool  = xsk_pool;
@@ -1090,7 +1090,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 	int err;
 
 	sq->channel   = c;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1174,7 +1174,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->priv      = c->priv;
 	sq->ch_ix     = c->ix;
 	sq->txq_ix    = txq_ix;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
@@ -1257,7 +1257,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 	MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
 
 	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
-	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.bfreg.index);
+	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
 	MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
 					  MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
@@ -2032,7 +2032,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->cpu      = cpu;
 	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
 	c->num_tc   = params->num_tc;
 	c->xdp      = !!params->xdp_prog;
 	c->stats    = &priv->channel_stats[ix].ch;
@@ -2217,7 +2217,7 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride,
 		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
-	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
+	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
 	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
@@ -2248,7 +2248,7 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
-	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
+	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.hw_objs.pdn);
 
 	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
 }
@@ -3424,10 +3424,10 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
 {
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
 
 	if (MLX5_GET(tisc, tisc, tls_en))
-		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
 
 	if (mlx5_lag_is_lacp_owner(mdev))
 		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
@@ -3497,7 +3497,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
 					     u32 rqtn, u32 *tirc)
 {
-	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn);
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 	MLX5_SET(tirc, tirc, indirect_table, rqtn);
 	MLX5_SET(tirc, tirc, tunneled_offload_en,
@@ -3772,8 +3772,16 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	bool tc_unbind = false;
 	int err;
 
+	if (type == TC_SETUP_BLOCK &&
+	    ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
+		tc_unbind = true;
+
+	if (!netif_device_present(dev) && !tc_unbind)
+		return -ENODEV;
+
 	switch (type) {
 	case TC_SETUP_BLOCK: {
 		struct flow_block_offload *f = type_data;
@@ -3835,6 +3843,9 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
+	if (!netif_device_present(dev))
+		return;
+
 	/* In switchdev mode, monitor counters doesn't monitor
 	 * rx/tx stats of 802_3. The update stats mechanism
 	 * should keep the 802_3 layout counters updated
@@ -3876,11 +3887,19 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
 }
 
+static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
+{
+	if (mlx5e_is_uplink_rep(priv))
+		return; /* no rx mode for uplink rep */
+
+	queue_work(priv->wq, &priv->set_rx_mode_work);
+}
+
 static void mlx5e_set_rx_mode(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 }
 
 static int mlx5e_set_mac(struct net_device *netdev, void *addr)
@@ -3895,7 +3914,7 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr)
 	ether_addr_copy(netdev->dev_addr, saddr->sa_data);
 	netif_addr_unlock_bh(netdev);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 
 	return 0;
 }
@@ -4433,6 +4452,9 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+	if (mlx5e_is_uplink_rep(priv))
+		return -EOPNOTSUPP;
+
 	return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
 					    mlx5_ifla_link2vport(link_state));
 }
@@ -4444,6 +4466,9 @@ int mlx5e_get_vf_config(struct net_device *dev,
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
+	if (!netif_device_present(dev))
+		return -EOPNOTSUPP;
+
 	err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
 	if (err)
 		return err;
@@ -4460,6 +4485,32 @@ int mlx5e_get_vf_stats(struct net_device *dev,
 	return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
 					    vf_stats);
 }
+
+static bool
+mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (!netif_device_present(dev))
+		return false;
+
+	if (!mlx5e_is_uplink_rep(priv))
+		return false;
+
+	return mlx5e_rep_has_offload_stats(dev, attr_id);
+}
+
+static int
+mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+			void *sp)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (!mlx5e_is_uplink_rep(priv))
+		return -EOPNOTSUPP;
+
+	return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
+}
 #endif
 
 static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
@@ -4818,6 +4869,8 @@ const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
+	.ndo_has_offload_stats   = mlx5e_has_offload_stats,
+	.ndo_get_offload_stats   = mlx5e_get_offload_stats,
 #endif
 	.ndo_get_devlink_port    = mlx5e_get_devlink_port,
 };
@@ -5279,10 +5332,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 	if (err)
 		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
-	err = mlx5e_devlink_port_register(priv);
-	if (err)
-		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
-
 	mlx5e_health_create_reporters(priv);
 
 	return 0;
@@ -5291,7 +5340,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
 	mlx5e_health_destroy_reporters(priv);
-	mlx5e_devlink_port_unregister(priv);
 	mlx5e_tls_cleanup(priv);
 	mlx5e_ipsec_cleanup(priv);
 }
@@ -5431,7 +5479,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 		return;
 	mlx5e_dcbnl_init_app(priv);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 
 	rtnl_lock();
 	if (netif_running(netdev))
@@ -5454,7 +5502,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 	netif_device_detach(priv->netdev);
 	rtnl_unlock();
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 
 	mlx5e_hv_vhca_stats_destroy(priv);
 	if (mlx5e_monitor_counter_supported(priv))
@@ -5727,6 +5775,11 @@ rollback:
 	return err;
 }
 
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
+{
+	mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
+}
+
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
 {
 	struct net_device *netdev = priv->netdev;
@@ -5809,10 +5862,17 @@ static int mlx5e_probe(struct auxiliary_device *adev,
 
 	priv->profile = profile;
 	priv->ppriv = NULL;
+
+	err = mlx5e_devlink_port_register(priv);
+	if (err) {
+		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
+		goto err_destroy_netdev;
+	}
+
 	err = profile->init(mdev, netdev);
 	if (err) {
 		mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
-		goto err_destroy_netdev;
+		goto err_devlink_cleanup;
 	}
 
 	err = mlx5e_resume(adev);
@@ -5830,12 +5890,15 @@ static int mlx5e_probe(struct auxiliary_device *adev,
 	mlx5e_devlink_port_type_eth_set(priv);
 
 	mlx5e_dcbnl_init_app(priv);
+	mlx5_uplink_netdev_set(mdev, netdev);
 	return 0;
 
 err_resume:
 	mlx5e_suspend(adev, state);
 err_profile_cleanup:
 	profile->cleanup(priv);
+err_devlink_cleanup:
+	mlx5e_devlink_port_unregister(priv);
 err_destroy_netdev:
 	mlx5e_destroy_netdev(priv);
 	return err;
@@ -5850,6 +5913,7 @@ static void mlx5e_remove(struct auxiliary_device *adev)
 	unregister_netdev(priv->netdev);
 	mlx5e_suspend(adev, state);
 	priv->profile->cleanup(priv);
+	mlx5e_devlink_port_unregister(priv);
 	mlx5e_destroy_netdev(priv);
 }
 
@@ -5875,18 +5939,18 @@ int mlx5e_init(void)
 
 	mlx5e_ipsec_build_inverse_table();
 	mlx5e_build_ptys2ethtool_map();
-	ret = mlx5e_rep_init();
+	ret = auxiliary_driver_register(&mlx5e_driver);
 	if (ret)
 		return ret;
 
-	ret = auxiliary_driver_register(&mlx5e_driver);
+	ret = mlx5e_rep_init();
 	if (ret)
-		mlx5e_rep_cleanup();
+		auxiliary_driver_unregister(&mlx5e_driver);
 	return ret;
 }
 
 void mlx5e_cleanup(void)
 {
-	auxiliary_driver_unregister(&mlx5e_driver);
 	mlx5e_rep_cleanup();
+	auxiliary_driver_unregister(&mlx5e_driver);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index a132fff7a980..4cc902e0d71b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -44,6 +44,7 @@
 #include "en_tc.h"
 #include "en/rep/tc.h"
 #include "en/rep/neigh.h"
+#include "en/devlink.h"
 #include "fs_core.h"
 #include "lib/mlx5.h"
 #define CREATE_TRACE_POINTS
@@ -69,16 +70,6 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev,
 		 fw_rev_sub(mdev), mdev->board_id);
 }
 
-static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev,
-					 struct ethtool_drvinfo *drvinfo)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	mlx5e_rep_get_drvinfo(dev, drvinfo);
-	strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev),
-		sizeof(drvinfo->bus_info));
-}
-
 static const struct counter_desc sw_rep_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
@@ -285,46 +276,6 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
 	return mlx5e_ethtool_get_rxfh_indir_size(priv);
 }
 
-static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev,
-					     struct ethtool_pause_stats *stats)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	mlx5e_stats_pause_get(priv, stats);
-}
-
-static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
-					    struct ethtool_pauseparam *pauseparam)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	mlx5e_ethtool_get_pauseparam(priv, pauseparam);
-}
-
-static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
-					   struct ethtool_pauseparam *pauseparam)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
-}
-
-static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
-					       struct ethtool_link_ksettings *link_ksettings)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
-}
-
-static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
-					       const struct ethtool_link_ksettings *link_ksettings)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
-}
-
 static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES |
@@ -344,34 +295,6 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 	.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
 };
 
-static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
-	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
-				     ETHTOOL_COALESCE_MAX_FRAMES |
-				     ETHTOOL_COALESCE_USE_ADAPTIVE,
-	.get_drvinfo	   = mlx5e_uplink_rep_get_drvinfo,
-	.get_link	   = ethtool_op_get_link,
-	.get_strings       = mlx5e_rep_get_strings,
-	.get_sset_count    = mlx5e_rep_get_sset_count,
-	.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
-	.get_ringparam     = mlx5e_rep_get_ringparam,
-	.set_ringparam     = mlx5e_rep_set_ringparam,
-	.get_channels      = mlx5e_rep_get_channels,
-	.set_channels      = mlx5e_rep_set_channels,
-	.get_coalesce      = mlx5e_rep_get_coalesce,
-	.set_coalesce      = mlx5e_rep_set_coalesce,
-	.get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
-	.set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
-	.get_rxfh_key_size   = mlx5e_rep_get_rxfh_key_size,
-	.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
-	.get_rxfh          = mlx5e_get_rxfh,
-	.set_rxfh          = mlx5e_set_rxfh,
-	.get_rxnfc         = mlx5e_get_rxnfc,
-	.set_rxnfc         = mlx5e_set_rxnfc,
-	.get_pause_stats   = mlx5e_uplink_rep_get_pause_stats,
-	.get_pauseparam    = mlx5e_uplink_rep_get_pauseparam,
-	.set_pauseparam    = mlx5e_uplink_rep_set_pauseparam,
-};
-
 static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep)
 {
@@ -522,7 +445,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 	return (rep->vport == MLX5_VPORT_UPLINK);
 }
 
-static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -542,8 +465,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev,
 	return 0;
 }
 
-static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
-				       void *sp)
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+				void *sp)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -568,34 +491,6 @@ static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu)
 	return mlx5e_change_mtu(netdev, new_mtu, NULL);
 }
 
-static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
-}
-
-static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
-{
-	struct sockaddr *saddr = addr;
-
-	if (!is_valid_ether_addr(saddr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	ether_addr_copy(netdev->dev_addr, saddr->sa_data);
-	return 0;
-}
-
-static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
-					__be16 vlan_proto)
-{
-	netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");
-
-	if (vlan != 0)
-		return -EOPNOTSUPP;
-
-	/* allow setting 0-vid for compatibility with libvirt */
-	return 0;
-}
-
 static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -641,29 +536,10 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_change_carrier      = mlx5e_rep_change_carrier,
 };
 
-static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
-	.ndo_open                = mlx5e_open,
-	.ndo_stop                = mlx5e_close,
-	.ndo_start_xmit          = mlx5e_xmit,
-	.ndo_set_mac_address     = mlx5e_uplink_rep_set_mac,
-	.ndo_setup_tc            = mlx5e_rep_setup_tc,
-	.ndo_get_devlink_port    = mlx5e_rep_get_devlink_port,
-	.ndo_get_stats64         = mlx5e_get_stats,
-	.ndo_has_offload_stats	 = mlx5e_rep_has_offload_stats,
-	.ndo_get_offload_stats	 = mlx5e_rep_get_offload_stats,
-	.ndo_change_mtu          = mlx5e_uplink_rep_change_mtu,
-	.ndo_features_check      = mlx5e_features_check,
-	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
-	.ndo_set_vf_rate         = mlx5e_set_vf_rate,
-	.ndo_get_vf_config       = mlx5e_get_vf_config,
-	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
-	.ndo_set_vf_vlan         = mlx5e_uplink_rep_set_vf_vlan,
-	.ndo_set_features        = mlx5e_set_features,
-};
-
 bool mlx5e_eswitch_uplink_rep(struct net_device *netdev)
 {
-	return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep;
+	return netdev->netdev_ops == &mlx5e_netdev_ops &&
+	       mlx5e_is_uplink_rep(netdev_priv(netdev));
 }
 
 bool mlx5e_eswitch_vf_rep(struct net_device *netdev)
@@ -713,26 +589,15 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev,
-				   struct mlx5_core_dev *mdev,
-				   struct mlx5_eswitch_rep *rep)
+				   struct mlx5_core_dev *mdev)
 {
 	SET_NETDEV_DEV(netdev, mdev->device);
-	if (rep->vport == MLX5_VPORT_UPLINK) {
-		netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
-		/* we want a persistent mac for the uplink rep */
-		mlx5_query_mac_address(mdev, netdev->dev_addr);
-		netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
-		mlx5e_dcbnl_build_rep_netdev(netdev);
-	} else {
-		netdev->netdev_ops = &mlx5e_netdev_ops_rep;
-		eth_hw_addr_random(netdev);
-		netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
-	}
+	netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+	eth_hw_addr_random(netdev);
+	netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
 
 	netdev->watchdog_timeo    = 15 * HZ;
 
-	netdev->features       |= NETIF_F_NETNS_LOCAL;
-
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 	netdev->hw_features    |= NETIF_F_HW_TC;
 #endif
@@ -744,12 +609,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev,
 	netdev->hw_features    |= NETIF_F_TSO6;
 	netdev->hw_features    |= NETIF_F_RXCSUM;
 
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
-	else
-		netdev->features |= NETIF_F_VLAN_CHALLENGED;
-
 	netdev->features |= netdev->hw_features;
+	netdev->features |= NETIF_F_VLAN_CHALLENGED;
+	netdev->features |= NETIF_F_NETNS_LOCAL;
 }
 
 static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
@@ -1115,6 +977,14 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 	mlx5e_dcbnl_initialize(priv);
 	mlx5e_dcbnl_init_app(priv);
 	mlx5e_rep_neigh_init(rpriv);
+
+	netdev->wanted_features |= NETIF_F_HW_TC;
+
+	rtnl_lock();
+	if (netif_running(netdev))
+		mlx5e_open(netdev);
+	netif_device_attach(netdev);
+	rtnl_unlock();
 }
 
 static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
@@ -1122,6 +992,12 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+	rtnl_lock();
+	if (netif_running(priv->netdev))
+		mlx5e_close(priv->netdev);
+	netif_device_detach(priv->netdev);
+	rtnl_unlock();
+
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_dcbnl_delete_app(priv);
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
@@ -1198,33 +1074,64 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.update_carrier	        = mlx5e_update_carrier,
 	.rx_handlers            = &mlx5e_rx_handlers_rep,
 	.max_tc			= MLX5E_MAX_NUM_TC,
-	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
+	/* XSK is needed so we can replace profile with NIC netdev */
+	.rq_groups		= MLX5E_NUM_RQ_GROUPS(XSK),
 	.stats_grps		= mlx5e_ul_rep_stats_grps,
 	.stats_grps_num		= mlx5e_ul_rep_stats_grps_num,
 };
 
 /* e-Switch vport representors */
 static int
-mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev));
+	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+	struct devlink_port *dl_port;
+	int err;
+
+	rpriv->netdev = priv->netdev;
+
+	err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile,
+					  rpriv);
+	if (err)
+		return err;
+
+	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+	if (dl_port)
+		devlink_port_type_eth_set(dl_port, rpriv->netdev);
+
+	return 0;
+}
+
+static void
+mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv)
+{
+	struct net_device *netdev = rpriv->netdev;
+	struct devlink_port *dl_port;
+	struct mlx5_core_dev *dev;
+	struct mlx5e_priv *priv;
+
+	priv = netdev_priv(netdev);
+	dev = priv->mdev;
+
+	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+	if (dl_port)
+		devlink_port_type_clear(dl_port);
+	mlx5e_netdev_attach_nic_profile(priv);
+}
+
+static int
+mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
+	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
 	const struct mlx5e_profile *profile;
-	struct mlx5e_rep_priv *rpriv;
 	struct devlink_port *dl_port;
 	struct net_device *netdev;
 	struct mlx5e_priv *priv;
 	unsigned int txqs, rxqs;
 	int nch, err;
 
-	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
-	if (!rpriv)
-		return -ENOMEM;
-
-	/* rpriv->rep to be looked up when profile->init() is called */
-	rpriv->rep = rep;
-
-	profile = (rep->vport == MLX5_VPORT_UPLINK) ?
-		  &mlx5e_uplink_rep_profile : &mlx5e_rep_profile;
-
+	profile = &mlx5e_rep_profile;
 	nch = mlx5e_get_max_num_channels(dev);
 	txqs = nch * profile->max_tc;
 	rxqs = nch * profile->rq_groups;
@@ -1233,21 +1140,11 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		mlx5_core_warn(dev,
 			       "Failed to create representor netdev for vport %d\n",
 			       rep->vport);
-		kfree(rpriv);
 		return -EINVAL;
 	}
 
-	mlx5e_build_rep_netdev(netdev, dev, rep);
-
+	mlx5e_build_rep_netdev(netdev, dev);
 	rpriv->netdev = netdev;
-	rep->rep_data[REP_ETH].priv = rpriv;
-	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
-
-	if (rep->vport == MLX5_VPORT_UPLINK) {
-		err = mlx5e_create_mdev_resources(dev);
-		if (err)
-			goto err_destroy_netdev;
-	}
 
 	priv = netdev_priv(netdev);
 	priv->profile = profile;
@@ -1255,7 +1152,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	err = profile->init(dev, netdev);
 	if (err) {
 		netdev_warn(netdev, "rep profile init failed, %d\n", err);
-		goto err_destroy_mdev_resources;
+		goto err_destroy_netdev;
 	}
 
 	err = mlx5e_attach_netdev(netdev_priv(netdev));
@@ -1285,13 +1182,34 @@ err_detach_netdev:
 err_cleanup_profile:
 	priv->profile->cleanup(priv);
 
-err_destroy_mdev_resources:
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		mlx5e_destroy_mdev_resources(dev);
-
 err_destroy_netdev:
 	mlx5e_destroy_netdev(netdev_priv(netdev));
-	kfree(rpriv);
+	return err;
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_rep_priv *rpriv;
+	int err;
+
+	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+	if (!rpriv)
+		return -ENOMEM;
+
+	/* rpriv->rep to be looked up when profile->init() is called */
+	rpriv->rep = rep;
+	rep->rep_data[REP_ETH].priv = rpriv;
+	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		err = mlx5e_vport_uplink_rep_load(dev, rep);
+	else
+		err = mlx5e_vport_vf_rep_load(dev, rep);
+
+	if (err)
+		kfree(rpriv);
+
 	return err;
 }
 
@@ -1305,15 +1223,19 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct devlink_port *dl_port;
 	void *ppriv = priv->ppriv;
 
+	if (rep->vport == MLX5_VPORT_UPLINK) {
+		mlx5e_vport_uplink_rep_unload(rpriv);
+		goto free_ppriv;
+	}
+
 	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
 	if (dl_port)
 		devlink_port_type_clear(dl_port);
 	unregister_netdev(netdev);
 	mlx5e_detach_netdev(priv);
 	priv->profile->cleanup(priv);
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		mlx5e_destroy_mdev_resources(priv->mdev);
 	mlx5e_destroy_netdev(priv);
+free_ppriv:
 	kfree(ppriv); /* mlx5e_rep_priv */
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index d1696404cca9..931fa619cb01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -220,6 +220,10 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 			    const struct net_device *lag_dev);
 int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup);
 
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id);
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+				void *sp);
+
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
@@ -240,6 +244,11 @@ static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
 static inline int mlx5e_rep_init(void) { return 0; };
 static inline void mlx5e_rep_cleanup(void) {};
+static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev,
+					       int attr_id) { return false; }
+static inline int mlx5e_rep_get_offload_stats(int attr_id,
+					      const struct net_device *dev,
+					      void *sp) { return -EOPNOTSUPP; }
 #endif
 
 static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 249d8905e644..f90894eea9e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -52,6 +52,7 @@
 #include "en/health.h"
 #include "en/params.h"
 #include "devlink.h"
+#include "en/devlink.h"
 
 static struct sk_buff *
 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
@@ -669,6 +670,7 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 						 get_cqe_opcode(cqe));
 				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
 						     (struct mlx5_err_cqe *)cqe);
+				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
 				if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
 					queue_work(cq->priv->wq, &sq->recover_work);
 				break;
@@ -1822,6 +1824,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
 	struct mlx5e_priv *priv = netdev_priv(rq->netdev);
 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	struct mlx5e_wqe_frag_info *wi;
+	struct devlink_port *dl_port;
 	struct sk_buff *skb;
 	u32 cqe_bcnt;
 	u16 trap_id;
@@ -1844,7 +1847,8 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 	skb_push(skb, ETH_HLEN);
 
-	mlx5_devlink_trap_report(rq->mdev, trap_id, skb, &priv->dl_port);
+	dl_port = mlx5e_devlink_get_dl_port(priv);
+	mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port);
 	dev_kfree_skb_any(skb);
 
 free_wqe:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index df2a0af854bb..4bd882a1018c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -445,12 +445,16 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 }
 
-static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
+static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 {
-	u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
+	u32 *indirection_rqt, rqn;
 	struct mlx5e_priv *priv = hp->func_priv;
 	int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
 
+	indirection_rqt = kzalloc(sz, GFP_KERNEL);
+	if (!indirection_rqt)
+		return -ENOMEM;
+
 	mlx5e_build_default_indir_rqt(indirection_rqt, sz,
 				      hp->num_channels);
 
@@ -462,6 +466,9 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 		rqn = hp->pair->rqn[ix];
 		MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
 	}
+
+	kfree(indirection_rqt);
+	return 0;
 }
 
 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
@@ -482,12 +489,15 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 
-	mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+	err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+	if (err)
+		goto out;
 
 	err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
 	if (!err)
 		hp->indir_rqt.enabled = true;
 
+out:
 	kvfree(in);
 	return err;
 }
@@ -1077,19 +1087,23 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 	if (flow_flag_test(flow, CT)) {
 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
 
-		return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
+		rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
 					       flow, spec, attr,
 					       mod_hdr_acts);
+	} else {
+		rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 	}
 
-	rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 	if (IS_ERR(rule))
 		return rule;
 
 	if (attr->esw_attr->split_count) {
 		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
 		if (IS_ERR(flow->rule[1])) {
-			mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+			if (flow_flag_test(flow, CT))
+				mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+			else
+				mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
 			return flow->rule[1];
 		}
 	}
@@ -1947,6 +1961,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 				    misc_parameters);
 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    misc_parameters);
+	void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters_3);
+	void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters_3);
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 addr_type = 0;
@@ -1976,6 +1994,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 	      BIT(FLOW_DISSECTOR_KEY_CT) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ICMP) |
 	      BIT(FLOW_DISSECTOR_KEY_MPLS))) {
 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
 		netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
@@ -2295,7 +2314,49 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		if (match.mask->flags)
 			*match_level = MLX5_MATCH_L4;
 	}
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_match_icmp match;
 
+		flow_rule_match_icmp(rule, &match);
+		switch (ip_proto) {
+		case IPPROTO_ICMP:
+			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+			      MLX5_FLEX_PROTO_ICMP))
+				return -EOPNOTSUPP;
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
+				 match.mask->type);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
+				 match.key->type);
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
+				 match.mask->code);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
+				 match.key->code);
+			break;
+		case IPPROTO_ICMPV6:
+			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+			      MLX5_FLEX_PROTO_ICMPV6))
+				return -EOPNOTSUPP;
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
+				 match.mask->type);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
+				 match.key->type);
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
+				 match.mask->code);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
+				 match.key->code);
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Code and type matching only with ICMP and ICMPv6");
+			netdev_err(priv->netdev,
+				   "Code and type matching only with ICMP and ICMPv6\n");
+			return -EINVAL;
+		}
+		if (match.mask->code || match.mask->type) {
+			*match_level = MLX5_MATCH_L4;
+			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+		}
+	}
 	/* Currenlty supported only for MPLS over UDP */
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
 	    !netif_is_bareudp(filter_dev)) {
@@ -3011,7 +3072,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
 	actions = flow->attr->action;
 
 	if (mlx5e_is_eswitch_flow(flow)) {
-		if (flow->attr->esw_attr->split_count && ct_flow) {
+		if (flow->attr->esw_attr->split_count && ct_flow &&
+		    !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) {
 			/* All registers used by ct are cleared when using
 			 * split rules.
 			 */
@@ -3811,6 +3873,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				return err;
 
 			flow_flag_set(flow, CT);
+			esw_attr->split_count = esw_attr->out_count;
 			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
@@ -3873,11 +3936,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			return -EOPNOTSUPP;
 		}
 
-		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-			NL_SET_ERR_MSG_MOD(extack,
-					   "Mirroring goto chain rules isn't supported");
-			return -EOPNOTSUPP;
-		}
 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
@@ -4297,6 +4355,11 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	struct mlx5e_tc_flow *flow;
 	int err = 0;
 
+	if (!mlx5_esw_hold(priv->mdev))
+		return -EAGAIN;
+
+	mlx5_esw_get(priv->mdev);
+
 	rcu_read_lock();
 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
 	if (flow) {
@@ -4334,11 +4397,14 @@ rcu_unlock:
 	if (err)
 		goto err_free;
 
+	mlx5_esw_release(priv->mdev);
 	return 0;
 
 err_free:
 	mlx5e_flow_put(priv, flow);
 out:
+	mlx5_esw_put(priv->mdev);
+	mlx5_esw_release(priv->mdev);
 	return err;
 }
 
@@ -4378,6 +4444,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	trace_mlx5e_delete_flower(f);
 	mlx5e_flow_put(priv, flow);
 
+	mlx5_esw_put(priv->mdev);
 	return 0;
 
 errout:
@@ -4513,6 +4580,10 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
 	flow_action_for_each(i, act, flow_action) {
 		switch (act->id) {
 		case FLOW_ACTION_POLICE:
+			if (act->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
 			err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
 			if (err)
 				return err;
@@ -4679,10 +4750,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
 				 MLX5_FLOW_NAMESPACE_KERNEL);
-	if (IS_ERR(tc->ct)) {
-		err = PTR_ERR(tc->ct);
-		goto err_ct;
-	}
 
 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
 	err = register_netdevice_notifier_dev_net(priv->netdev,
@@ -4698,7 +4765,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 err_reg:
 	mlx5_tc_ct_clean(tc->ct);
-err_ct:
 	mlx5_chains_destroy(tc->chains);
 err_chains:
 	rhashtable_destroy(&tc->ht);
@@ -4757,8 +4823,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 					       esw_chains(esw),
 					       &esw->offloads.mod_hdr,
 					       MLX5_FLOW_NAMESPACE_FDB);
-	if (IS_ERR(uplink_priv->ct_priv))
-		goto err_ct;
 
 	mapping = mapping_create(sizeof(struct tunnel_match_key),
 				 TUNNEL_INFO_BITS_MASK, true);
@@ -4798,7 +4862,6 @@ err_enc_opts_mapping:
 	mapping_destroy(uplink_priv->tunnel_mapping);
 err_tun_mapping:
 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
-err_ct:
 	netdev_warn(priv->netdev,
 		    "Failed to initialize tc (eswitch), err: %d", err);
 	return err;
@@ -4871,9 +4934,17 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 			    void *cb_priv)
 {
-	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
+	unsigned long flags = MLX5_TC_FLAG(INGRESS);
 	struct mlx5e_priv *priv = cb_priv;
 
+	if (!priv->netdev || !netif_device_present(priv->netdev))
+		return -EOPNOTSUPP;
+
+	if (mlx5e_is_uplink_rep(priv))
+		flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
+	else
+		flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
+
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index bdbffe484fce..d2efe2455955 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -576,7 +576,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
 
 	pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
-	prefetchw(wqe->data);
+	net_prefetchw(wqe->data);
 
 	*session = (struct mlx5e_tx_mpwqe) {
 		.wqe = wqe,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index cb1e181f4c6a..7bfc84238b3d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -120,7 +120,7 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
-	return vport->dl_port;
+	return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port;
 }
 
 int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
index 6f6772bf61a2..3da7becc1069 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -248,7 +248,7 @@ err_mod_hdr_regc0:
 err_ethertype:
 	kfree(rule);
 out:
-	kfree(rule_spec);
+	kvfree(rule_spec);
 	return err;
 }
 
@@ -328,7 +328,7 @@ static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
 	e->recirc_cnt = 0;
 
 out:
-	kfree(in);
+	kvfree(in);
 	return err;
 }
 
@@ -347,7 +347,7 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
-		kfree(in);
+		kvfree(in);
 		return -ENOMEM;
 	}
 
@@ -371,8 +371,8 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
 	}
 
 err_out:
-	kfree(spec);
-	kfree(in);
+	kvfree(spec);
+	kvfree(in);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
index cb9eafd1b4ee..21d56b49d14b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
@@ -30,13 +30,13 @@ mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr);
 
 #else
 /* indir API stubs */
-struct mlx5_esw_indir_table *
+static inline struct mlx5_esw_indir_table *
 mlx5_esw_indir_table_init(void)
 {
 	return NULL;
 }
 
-void
+static inline void
 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
 {
 }
@@ -57,7 +57,7 @@ mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
 {
 }
 
-bool
+static inline bool
 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
 			    struct mlx5_flow_attr *attr,
 			    u16 vport_num,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index aba17835465b..6b260dacf853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -435,6 +435,7 @@ static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
 	esw->fdb_table.legacy.addr_grp = NULL;
 	esw->fdb_table.legacy.allmulti_grp = NULL;
 	esw->fdb_table.legacy.promisc_grp = NULL;
+	atomic64_set(&esw->user_count, 0);
 }
 
 static int esw_create_legacy_table(struct mlx5_eswitch *esw)
@@ -442,6 +443,7 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw)
 	int err;
 
 	memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+	atomic64_set(&esw->user_count, 0);
 
 	err = esw_create_legacy_vepa_table(esw);
 	if (err)
@@ -1141,6 +1143,8 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
 
 	if (!vport->qos.enabled)
 		return -EOPNOTSUPP;
@@ -1279,6 +1283,8 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
 	int ret;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
 
 	mutex_lock(&esw->state_lock);
 	WARN_ON(vport->enabled);
@@ -1326,6 +1332,8 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return;
 
 	mutex_lock(&esw->state_lock);
 	if (!vport->enabled)
@@ -1714,7 +1722,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 	if (!ESW_ALLOWED(esw))
 		return 0;
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	if (esw->mode == MLX5_ESWITCH_NONE) {
 		ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
 	} else {
@@ -1726,7 +1734,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 		if (!ret)
 			esw->esw_funcs.num_vfs = num_vfs;
 	}
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return ret;
 }
 
@@ -1774,10 +1782,10 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 	if (!ESW_ALLOWED(esw))
 		return;
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	mlx5_eswitch_disable_locked(esw, clear_vf);
 	esw->esw_funcs.num_vfs = 0;
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -1834,7 +1842,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	ida_init(&esw->offloads.vport_metadata_ida);
 	xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
 	mutex_init(&esw->state_lock);
-	mutex_init(&esw->mode_lock);
+	init_rwsem(&esw->mode_lock);
 
 	mlx5_esw_for_all_vports(esw, i, vport) {
 		vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
@@ -1870,7 +1878,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
 	esw_offloads_cleanup_reps(esw);
-	mutex_destroy(&esw->mode_lock);
 	mutex_destroy(&esw->state_lock);
 	WARN_ON(!xa_empty(&esw->offloads.vhca_map));
 	xa_destroy(&esw->offloads.vhca_map);
@@ -2034,6 +2041,10 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 		vport = 0;
 	}
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 
 	err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state);
 	if (err) {
@@ -2105,7 +2116,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				u16 vport, u16 vlan, u8 qos)
 {
 	u8 set_flags = 0;
-	int err;
+	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
@@ -2114,9 +2125,18 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 		set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
 
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		if (!vlan)
+			goto unlock; /* compatibility with libvirt */
+
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
 	err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
-	mutex_unlock(&esw->state_lock);
 
+unlock:
+	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
@@ -2133,6 +2153,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 		return PTR_ERR(evport);
 
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 	pschk = evport->info.spoofchk;
 	evport->info.spoofchk = spoofchk;
 	if (pschk && !is_valid_ether_addr(evport->info.mac))
@@ -2143,8 +2167,9 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 		err = esw_acl_ingress_lgcy_setup(esw, evport);
 	if (err)
 		evport->info.spoofchk = pschk;
-	mutex_unlock(&esw->state_lock);
 
+unlock:
+	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
@@ -2265,6 +2290,7 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
 				 u16 vport, bool setting)
 {
 	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
@@ -2272,12 +2298,17 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
 		return PTR_ERR(evport);
 
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 	evport->info.trusted = setting;
 	if (evport->enabled)
 		esw_vport_change_handle_locked(evport);
-	mutex_unlock(&esw->state_lock);
 
-	return 0;
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
 }
 
 static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
@@ -2552,3 +2583,94 @@ void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifie
 {
 	blocking_notifier_chain_unregister(&esw->n_head, nb);
 }
+
+/**
+ * mlx5_esw_hold() - Try to take a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ *
+ * Should be called by esw resources callers.
+ *
+ * Return: true on success or false.
+ */
+bool mlx5_esw_hold(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	/* e.g. VF doesn't have eswitch so nothing to do */
+	if (!ESW_ALLOWED(esw))
+		return true;
+
+	if (down_read_trylock(&esw->mode_lock) != 0)
+		return true;
+
+	return false;
+}
+
+/**
+ * mlx5_esw_release() - Release a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_release(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	if (ESW_ALLOWED(esw))
+		up_read(&esw->mode_lock);
+}
+
+/**
+ * mlx5_esw_get() - Increase esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_get(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	if (ESW_ALLOWED(esw))
+		atomic64_inc(&esw->user_count);
+}
+
+/**
+ * mlx5_esw_put() - Decrease esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_put(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	if (ESW_ALLOWED(esw))
+		atomic64_dec_if_positive(&esw->user_count);
+}
+
+/**
+ * mlx5_esw_try_lock() - Take a write lock on esw mode lock.
+ * @esw: eswitch device.
+ *
+ * Should be called by esw mode change routine.
+ *
+ * Return:
+ * * 0       - esw mode if successfully locked and refcount is 0.
+ * * -EBUSY  - refcount is not 0.
+ * * -EINVAL - In the middle of switching mode or lock is already held.
+ */
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
+{
+	if (down_write_trylock(&esw->mode_lock) == 0)
+		return -EINVAL;
+
+	if (atomic64_read(&esw->user_count) > 0) {
+		up_write(&esw->mode_lock);
+		return -EBUSY;
+	}
+
+	return esw->mode;
+}
+
+/**
+ * mlx5_esw_unlock() - Release write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_unlock(struct mlx5_eswitch *esw)
+{
+	up_write(&esw->mode_lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index fdf5c8c05c1b..56d85cedb9bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -271,7 +271,8 @@ struct mlx5_eswitch {
 	/* Protects eswitch mode change that occurs via one or more
 	 * user commands, i.e. sriov state change, devlink commands.
 	 */
-	struct mutex mode_lock;
+	struct rw_semaphore mode_lock;
+	atomic64_t user_count;
 
 	struct {
 		bool            enabled;
@@ -761,6 +762,14 @@ struct mlx5_esw_event_info {
 
 int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n);
 void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n);
+
+bool mlx5_esw_hold(struct mlx5_core_dev *dev);
+void mlx5_esw_release(struct mlx5_core_dev *dev);
+void mlx5_esw_get(struct mlx5_core_dev *dev);
+void mlx5_esw_put(struct mlx5_core_dev *dev);
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
+void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -781,6 +790,13 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+
+static inline unsigned int
+mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
+				     u16 vport_num)
+{
+	return vport_num;
+}
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8694b83968b4..d5de6bf622ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -40,7 +40,6 @@
 #include "eswitch.h"
 #include "esw/indir_table.h"
 #include "esw/acl/ofld.h"
-#include "esw/indir_table.h"
 #include "rdma.h"
 #include "en.h"
 #include "fs_core.h"
@@ -1447,7 +1446,7 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
 		return ERR_PTR(-EOPNOTSUPP);
 
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
 		return ERR_PTR(-ENOMEM);
 
@@ -1470,7 +1469,7 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 	dest.ft = esw->offloads.ft_offloads;
 
 	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
-	kfree(spec);
+	kvfree(spec);
 
 	if (IS_ERR(flow_rule))
 		esw_warn(esw->dev,
@@ -1855,6 +1854,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	/* Holds true only as long as DMFS is the default */
 	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
 				     MLX5_FLOW_STEERING_MODE_DMFS);
+	atomic64_set(&esw->user_count, 0);
 }
 
 static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@ -2260,9 +2260,11 @@ int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return 0;
 
-	err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
-	if (err)
-		return err;
+	if (vport_num != MLX5_VPORT_UPLINK) {
+		err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
+		if (err)
+			return err;
+	}
 
 	err = mlx5_esw_offloads_rep_load(esw, vport_num);
 	if (err)
@@ -2270,7 +2272,8 @@ int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
 	return err;
 
 load_err:
-	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+	if (vport_num != MLX5_VPORT_UPLINK)
+		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 	return err;
 }
 
@@ -2280,7 +2283,9 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
 		return;
 
 	mlx5_esw_offloads_rep_unload(esw, vport_num);
-	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+
+	if (vport_num != MLX5_VPORT_UPLINK)
+		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 }
 
 #define ESW_OFFLOADS_DEVCOM_PAIR	(0)
@@ -2555,6 +2560,9 @@ static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
 	return esw_vport_create_offloads_acl_tables(esw, vport);
 }
 
@@ -2563,6 +2571,9 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+	if (IS_ERR(vport))
+		return;
+
 	esw_vport_destroy_offloads_acl_tables(esw, vport);
 }
 
@@ -2574,6 +2585,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
 	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
 	mutex_init(&esw->fdb_table.offloads.vports.lock);
 	hash_init(esw->fdb_table.offloads.vports.table);
+	atomic64_set(&esw->user_count, 0);
 
 	indir = mlx5_esw_indir_table_init();
 	if (IS_ERR(indir)) {
@@ -2915,8 +2927,14 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
-	mutex_lock(&esw->mode_lock);
-	cur_mlx5_mode = esw->mode;
+	err = mlx5_esw_try_lock(esw);
+	if (err < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
+		return err;
+	}
+	cur_mlx5_mode = err;
+	err = 0;
+
 	if (cur_mlx5_mode == mlx5_mode)
 		goto unlock;
 
@@ -2928,7 +2946,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 		err = -EINVAL;
 
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	mlx5_esw_unlock(esw);
 	return err;
 }
 
@@ -2941,14 +2959,14 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
 
 	err = esw_mode_to_devlink(esw->mode, mode);
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -2964,7 +2982,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto out;
@@ -3003,7 +3021,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	}
 
 	esw->offloads.inline_mode = mlx5_mode;
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return 0;
 
 revert_inline_mode:
@@ -3013,7 +3031,7 @@ revert_inline_mode:
 						 vport,
 						 esw->offloads.inline_mode);
 out:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -3026,14 +3044,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
 
 	err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -3049,7 +3067,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
@@ -3095,7 +3113,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
 	}
 
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -3110,14 +3128,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
 		return PTR_ERR(esw);
 
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
 
 	*encap = esw->offloads.encap;
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 66ad599bd488..f5517ea2f6be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2395,14 +2395,12 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
 			  struct init_tree_node *init_node,
 			  struct fs_node *fs_parent_node)
 {
-	int i;
-	struct mlx5_flow_namespace *fs_ns;
 	int err;
+	int i;
 
-	fs_get_obj(fs_ns, fs_parent_node);
 	for (i = 0; i < init_node->ar_size; i++) {
 		err = init_root_tree_recursive(steering, &init_node->children[i],
-					       &fs_ns->node,
+					       fs_parent_node,
 					       init_node, i);
 		if (err)
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index f9042e147c7f..d5d57630015f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -104,7 +104,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
 	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
 		complete(&fw_reset->done);
 	} else {
-		mlx5_load_one(dev, false);
+		mlx5_load_one(dev);
 		devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
 							BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
 							BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
@@ -119,7 +119,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
 	int err;
 
 	mlx5_enter_error_state(dev, true);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 	err = mlx5_health_wait_pci_up(dev);
 	if (err)
 		mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
@@ -199,16 +199,11 @@ static void mlx5_fw_live_patch_event(struct work_struct *work)
 	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
 						      fw_live_patch_work);
 	struct mlx5_core_dev *dev = fw_reset->dev;
-	struct mlx5_fw_tracer *tracer;
 
 	mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev),
 		       fw_rev_min(dev), fw_rev_sub(dev));
 
-	tracer = dev->tracer;
-	if (IS_ERR_OR_NULL(tracer))
-		return;
-
-	if (mlx5_fw_tracer_reload(tracer))
+	if (mlx5_fw_tracer_reload(dev->tracer))
 		mlx5_core_err(dev, "Failed to reload FW tracer\n");
 }
 
@@ -342,7 +337,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
 	}
 
 	mlx5_enter_error_state(dev, true);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 done:
 	fw_reset->ret = err;
 	mlx5_fw_reset_complete_reload(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 0c32c485eb58..a0a851640804 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -335,12 +335,12 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
 		return -EIO;
 	}
 	mlx5_core_err(dev, "starting health recovery flow\n");
-	mlx5_recover_device(dev);
-	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
-	    mlx5_health_check_fatal_sensors(dev)) {
+	if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
 		mlx5_core_err(dev, "health recovery failed\n");
 		return -EIO;
 	}
+
+	mlx5_core_info(dev, "health revovery succeded\n");
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 6f7cef47e04c..48303286c133 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -710,7 +710,7 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev)
 
 static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
 {
-	return mdev->mlx5e_res.pdn != 0;
+	return mdev->mlx5e_res.hw_objs.pdn != 0;
 }
 
 static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 83a05371e2aa..127bb92da150 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -768,7 +768,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 
 	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev_get(dev);
-	if (ldev && __mlx5_lag_is_roce(ldev)) {
+	if (ldev && __mlx5_lag_is_active(ldev)) {
 		num_ports = MLX5_MAX_PORTS;
 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
index 57eb91bcbca7..e995f8378df7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -46,7 +46,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
 		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
 	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
 		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
-	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn);
+	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn);
 
 	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 	if (!err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index a68738c8f4bc..97324d9d4f2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -55,10 +55,6 @@ void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
 
 int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
 {
-	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
-		mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
-		return -EPERM;
-	}
 	if (dev->roce.reserved_gids.start < count) {
 		mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
 			       count);
@@ -79,7 +75,6 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
 
 void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
 {
-	WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
 	WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
 	     count, dev->roce.reserved_gids.count);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index d046db7bb047..2f536c5d30b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -95,4 +95,13 @@ static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
 	return devlink_net(priv_to_devlink(dev));
 }
 
+static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+	mdev->mlx5e_res.uplink_netdev = netdev;
+}
+
+static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev)
+{
+	return mdev->mlx5e_res.uplink_netdev;
+}
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c568896cfb23..e3a417d17707 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1235,7 +1235,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_put_uars_page(dev, dev->priv.uar);
 }
 
-int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
+int mlx5_init_one(struct mlx5_core_dev *dev)
 {
 	int err = 0;
 
@@ -1247,16 +1247,14 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 	/* remove any previous indication of internal error */
 	dev->state = MLX5_DEVICE_STATE_UP;
 
-	err = mlx5_function_setup(dev, boot);
+	err = mlx5_function_setup(dev, true);
 	if (err)
 		goto err_function;
 
-	if (boot) {
-		err = mlx5_init_once(dev);
-		if (err) {
-			mlx5_core_err(dev, "sw objs init failed\n");
-			goto function_teardown;
-		}
+	err = mlx5_init_once(dev);
+	if (err) {
+		mlx5_core_err(dev, "sw objs init failed\n");
+		goto function_teardown;
 	}
 
 	err = mlx5_load(dev);
@@ -1265,16 +1263,11 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 
 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 
-	if (boot) {
-		err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
-		if (err)
-			goto err_devlink_reg;
-
-		err = mlx5_register_device(dev);
-	} else {
-		err = mlx5_attach_device(dev);
-	}
+	err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+	if (err)
+		goto err_devlink_reg;
 
+	err = mlx5_register_device(dev);
 	if (err)
 		goto err_register;
 
@@ -1282,16 +1275,14 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 	return 0;
 
 err_register:
-	if (boot)
-		mlx5_devlink_unregister(priv_to_devlink(dev));
+	mlx5_devlink_unregister(priv_to_devlink(dev));
 err_devlink_reg:
 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 	mlx5_unload(dev);
 err_load:
-	if (boot)
-		mlx5_cleanup_once(dev);
+	mlx5_cleanup_once(dev);
 function_teardown:
-	mlx5_function_teardown(dev, boot);
+	mlx5_function_teardown(dev, true);
 err_function:
 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 out:
@@ -1299,33 +1290,84 @@ out:
 	return err;
 }
 
-void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
+void mlx5_uninit_one(struct mlx5_core_dev *dev)
 {
 	mutex_lock(&dev->intf_state_mutex);
 
-	if (cleanup) {
-		mlx5_unregister_device(dev);
-		mlx5_devlink_unregister(priv_to_devlink(dev));
-	} else {
-		mlx5_detach_device(dev);
-	}
+	mlx5_unregister_device(dev);
+	mlx5_devlink_unregister(priv_to_devlink(dev));
 
 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
 		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
 			       __func__);
-		if (cleanup)
-			mlx5_cleanup_once(dev);
+		mlx5_cleanup_once(dev);
 		goto out;
 	}
 
 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+	mlx5_unload(dev);
+	mlx5_cleanup_once(dev);
+	mlx5_function_teardown(dev, true);
+out:
+	mutex_unlock(&dev->intf_state_mutex);
+}
+
+int mlx5_load_one(struct mlx5_core_dev *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->intf_state_mutex);
+	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+		mlx5_core_warn(dev, "interface is up, NOP\n");
+		goto out;
+	}
+	/* remove any previous indication of internal error */
+	dev->state = MLX5_DEVICE_STATE_UP;
+
+	err = mlx5_function_setup(dev, false);
+	if (err)
+		goto err_function;
+
+	err = mlx5_load(dev);
+	if (err)
+		goto err_load;
+
+	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+
+	err = mlx5_attach_device(dev);
+	if (err)
+		goto err_attach;
+
+	mutex_unlock(&dev->intf_state_mutex);
+	return 0;
 
+err_attach:
+	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 	mlx5_unload(dev);
+err_load:
+	mlx5_function_teardown(dev, false);
+err_function:
+	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+out:
+	mutex_unlock(&dev->intf_state_mutex);
+	return err;
+}
 
-	if (cleanup)
-		mlx5_cleanup_once(dev);
+void mlx5_unload_one(struct mlx5_core_dev *dev)
+{
+	mutex_lock(&dev->intf_state_mutex);
 
-	mlx5_function_teardown(dev, cleanup);
+	mlx5_detach_device(dev);
+
+	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+			       __func__);
+		goto out;
+	}
+
+	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+	mlx5_unload(dev);
+	mlx5_function_teardown(dev, false);
 out:
 	mutex_unlock(&dev->intf_state_mutex);
 }
@@ -1397,7 +1439,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
 	mutex_destroy(&dev->intf_state_mutex);
 }
 
-static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx5_core_dev *dev;
 	struct devlink *devlink;
@@ -1433,11 +1475,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto pci_init_err;
 	}
 
-	err = mlx5_load_one(dev, true);
+	err = mlx5_init_one(dev);
 	if (err) {
-		mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n",
+		mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
 			      err);
-		goto err_load_one;
+		goto err_init_one;
 	}
 
 	err = mlx5_crdump_enable(dev);
@@ -1449,7 +1491,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		devlink_reload_enable(devlink);
 	return 0;
 
-err_load_one:
+err_init_one:
 	mlx5_pci_close(dev);
 pci_init_err:
 	mlx5_mdev_uninit(dev);
@@ -1469,7 +1511,7 @@ static void remove_one(struct pci_dev *pdev)
 	devlink_reload_disable(devlink);
 	mlx5_crdump_disable(dev);
 	mlx5_drain_health_wq(dev);
-	mlx5_unload_one(dev, true);
+	mlx5_uninit_one(dev);
 	mlx5_pci_close(dev);
 	mlx5_mdev_uninit(dev);
 	mlx5_adev_idx_free(dev->priv.adev_idx);
@@ -1485,7 +1527,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
 	mlx5_enter_error_state(dev, false);
 	mlx5_error_sw_reset(dev);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 	mlx5_drain_health_wq(dev);
 	mlx5_pci_disable_device(dev);
 
@@ -1555,7 +1597,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
 
 	mlx5_core_info(dev, "%s was called\n", __func__);
 
-	err = mlx5_load_one(dev, false);
+	err = mlx5_load_one(dev);
 	if (err)
 		mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
 			      __func__, err);
@@ -1627,7 +1669,7 @@ static void shutdown(struct pci_dev *pdev)
 	mlx5_core_info(dev, "Shutdown was called\n");
 	err = mlx5_try_fast_unload(dev);
 	if (err)
-		mlx5_unload_one(dev, false);
+		mlx5_unload_one(dev);
 	mlx5_pci_disable_device(dev);
 }
 
@@ -1635,7 +1677,7 @@ static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 
 	return 0;
 }
@@ -1644,7 +1686,7 @@ static int mlx5_resume(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 
-	return mlx5_load_one(dev, false);
+	return mlx5_load_one(dev);
 }
 
 static const struct pci_device_id mlx5_core_pci_table[] = {
@@ -1676,20 +1718,23 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
 void mlx5_disable_device(struct mlx5_core_dev *dev)
 {
 	mlx5_error_sw_reset(dev);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 }
 
-void mlx5_recover_device(struct mlx5_core_dev *dev)
+int mlx5_recover_device(struct mlx5_core_dev *dev)
 {
+	int ret = -EIO;
+
 	mlx5_pci_disable_device(dev);
 	if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
-		mlx5_pci_resume(dev->pdev);
+		ret = mlx5_load_one(dev);
+	return ret;
 }
 
 static struct pci_driver mlx5_core_driver = {
 	.name           = KBUILD_MODNAME,
 	.id_table       = mlx5_core_pci_table,
-	.probe          = init_one,
+	.probe          = probe_one,
 	.remove         = remove_one,
 	.suspend        = mlx5_suspend,
 	.resume         = mlx5_resume,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index efe403c7e354..37c8ec7d2217 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -134,7 +134,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
 u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev);
 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
-void mlx5_recover_device(struct mlx5_core_dev *dev);
+int mlx5_recover_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
 void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 int mlx5_sriov_attach(struct mlx5_core_dev *dev);
@@ -267,8 +267,10 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev)
 
 int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx);
 void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
-void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup);
-int mlx5_load_one(struct mlx5_core_dev *dev, bool boot);
+int mlx5_init_one(struct mlx5_core_dev *dev);
+void mlx5_uninit_one(struct mlx5_core_dev *dev);
+void mlx5_unload_one(struct mlx5_core_dev *dev);
+int mlx5_load_one(struct mlx5_core_dev *dev);
 
 int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
index 4de02902aef1..149fd9e698cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
@@ -47,7 +47,7 @@ static inline void mlx5_sf_driver_unregister(void)
 
 static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
 {
-	return 0;
+	return false;
 }
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index c4bf555c25ea..42c8ee03fe3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -41,14 +41,15 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
 		goto remap_err;
 	}
 
-	err = mlx5_load_one(mdev, true);
+	err = mlx5_init_one(mdev);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_load_one err=%d\n", err);
-		goto load_one_err;
+		mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err);
+		goto init_one_err;
 	}
+	devlink_reload_enable(devlink);
 	return 0;
 
-load_one_err:
+init_one_err:
 	iounmap(mdev->iseg);
 remap_err:
 	mlx5_mdev_uninit(mdev);
@@ -63,7 +64,8 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
 	struct devlink *devlink;
 
 	devlink = priv_to_devlink(sf_dev->mdev);
-	mlx5_unload_one(sf_dev->mdev, true);
+	devlink_reload_disable(devlink);
+	mlx5_uninit_one(sf_dev->mdev);
 	iounmap(sf_dev->mdev->iseg);
 	mlx5_mdev_uninit(sf_dev->mdev);
 	mlx5_devlink_free(devlink);
@@ -73,7 +75,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
 {
 	struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
 
-	mlx5_unload_one(sf_dev->mdev, false);
+	mlx5_unload_one(sf_dev->mdev);
 }
 
 static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index c2ba41bb7a70..60a6328a9ca0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -492,7 +492,7 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
 		break;
 	default:
 		break;
-	};
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index a5a0f60bef66..c9bddde04047 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -5,6 +5,7 @@
 #include "priv.h"
 #include "sf.h"
 #include "mlx5_ifc_vhca_event.h"
+#include "ecpf.h"
 #include "vhca_event.h"
 #include "mlx5_core.h"
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index 9ec079247c4b..c5f62d2a058f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -331,7 +331,7 @@ static void dr_ste_v0_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr,
 	MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
 		 DR_STE_ACTION_TYPE_PUSH_VLAN);
 	MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr);
-	/* Due to HW limitation we need to set this bit, otherwise reforamt +
+	/* Due to HW limitation we need to set this bit, otherwise reformat +
 	 * push vlan will not work.
 	 */
 	if (go_back)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 9143ec326ebf..616ebc38381a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -437,21 +437,6 @@ static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action)
 	dr_ste_v1_set_reparse(hw_ste_p);
 }
 
-static void dr_ste_v1_set_rx_decap_l3(u8 *hw_ste_p,
-				      u8 *s_action,
-				      u16 decap_actions,
-				      u32 decap_index)
-{
-	MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id,
-		 DR_STE_V1_ACTION_ID_MODIFY_LIST);
-	MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions,
-		 decap_actions);
-	MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr,
-		 decap_index);
-
-	dr_ste_v1_set_reparse(hw_ste_p);
-}
-
 static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
 					  u8 *s_action,
 					  u16 num_of_actions,
@@ -571,9 +556,6 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
 	bool allow_ctr = true;
 
 	if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
-		dr_ste_v1_set_rx_decap_l3(last_ste, action,
-					  attr->decap_actions,
-					  attr->decap_index);
 		dr_ste_v1_set_rewrite_actions(last_ste, action,
 					      attr->decap_actions,
 					      attr->decap_index);
@@ -1532,6 +1514,7 @@ static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *val
 
 	DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port);
 	DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn);
+	misc_mask->source_eswitch_owner_vhca_id = 0;
 }
 
 static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 52fdc34251ba..7e9a7cb31720 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1728,7 +1728,7 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
 		return err;
 
 	event_id = mlxsw_reg_mfde_event_id_get(mfde_pl);
-	err = devlink_fmsg_u8_pair_put(fmsg, "id", event_id);
+	err = devlink_fmsg_u32_pair_put(fmsg, "id", event_id);
 	if (err)
 		return err;
 	switch (event_id) {
@@ -1806,6 +1806,10 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
 		err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val);
 		if (err)
 			return err;
+		val = mlxsw_reg_mfde_log_ip_get(mfde_pl);
+		err = devlink_fmsg_u64_pair_put(fmsg, "log_ip", val);
+		if (err)
+			return err;
 	} else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) {
 		val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl);
 		err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 8af7d9d03475..80712dc803d0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -58,6 +58,25 @@ struct mlxsw_tx_info {
 	bool is_emad;
 };
 
+struct mlxsw_rx_md_info {
+	u32 cookie_index;
+	u32 latency;
+	u32 tx_congestion;
+	union {
+		/* Valid when 'tx_port_valid' is set. */
+		u16 tx_sys_port;
+		u16 tx_lag_id;
+	};
+	u8 tx_lag_port_index; /* Valid when 'tx_port_is_lag' is set. */
+	u8 tx_tc;
+	u8 latency_valid:1,
+	   tx_congestion_valid:1,
+	   tx_tc_valid:1,
+	   tx_port_valid:1,
+	   tx_port_is_lag:1,
+	   unused:3;
+};
+
 bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
 				  const struct mlxsw_tx_info *tx_info);
 int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
@@ -515,7 +534,7 @@ enum mlxsw_devlink_param_id {
 struct mlxsw_skb_cb {
 	union {
 		struct mlxsw_tx_info tx_info;
-		u32 cookie_index; /* Only used during receive */
+		struct mlxsw_rx_md_info rx_md_info;
 	};
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 4d699fe98cb6..78d9c0196f2b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -2007,3 +2007,134 @@ int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport,
 	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_append_l4port);
+
+/* Mirror Sampler Action
+ * ---------------------
+ * The SAMPLER_ACTION is used to mirror packets with a probability (sampling).
+ */
+
+#define MLXSW_AFA_SAMPLER_CODE 0x13
+#define MLXSW_AFA_SAMPLER_SIZE 1
+
+/* afa_sampler_mirror_agent
+ * Mirror (SPAN) agent.
+ */
+MLXSW_ITEM32(afa, sampler, mirror_agent, 0x04, 0, 3);
+
+#define MLXSW_AFA_SAMPLER_RATE_MAX (BIT(24) - 1)
+
+/* afa_sampler_mirror_probability_rate
+ * Mirroring probability.
+ * Valid values are 1 to 2^24 - 1
+ */
+MLXSW_ITEM32(afa, sampler, mirror_probability_rate, 0x08, 0, 24);
+
+static void mlxsw_afa_sampler_pack(char *payload, u8 mirror_agent, u32 rate)
+{
+	mlxsw_afa_sampler_mirror_agent_set(payload, mirror_agent);
+	mlxsw_afa_sampler_mirror_probability_rate_set(payload, rate);
+}
+
+struct mlxsw_afa_sampler {
+	struct mlxsw_afa_resource resource;
+	int span_id;
+	u8 local_port;
+	bool ingress;
+};
+
+static void mlxsw_afa_sampler_destroy(struct mlxsw_afa_block *block,
+				      struct mlxsw_afa_sampler *sampler)
+{
+	mlxsw_afa_resource_del(&sampler->resource);
+	block->afa->ops->sampler_del(block->afa->ops_priv, sampler->local_port,
+				     sampler->span_id, sampler->ingress);
+	kfree(sampler);
+}
+
+static void mlxsw_afa_sampler_destructor(struct mlxsw_afa_block *block,
+					 struct mlxsw_afa_resource *resource)
+{
+	struct mlxsw_afa_sampler *sampler;
+
+	sampler = container_of(resource, struct mlxsw_afa_sampler, resource);
+	mlxsw_afa_sampler_destroy(block, sampler);
+}
+
+static struct mlxsw_afa_sampler *
+mlxsw_afa_sampler_create(struct mlxsw_afa_block *block, u8 local_port,
+			 struct psample_group *psample_group, u32 rate,
+			 u32 trunc_size, bool truncate, bool ingress,
+			 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_afa_sampler *sampler;
+	int err;
+
+	sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
+	if (!sampler)
+		return ERR_PTR(-ENOMEM);
+
+	err = block->afa->ops->sampler_add(block->afa->ops_priv, local_port,
+					   psample_group, rate, trunc_size,
+					   truncate, ingress, &sampler->span_id,
+					   extack);
+	if (err)
+		goto err_sampler_add;
+
+	sampler->ingress = ingress;
+	sampler->local_port = local_port;
+	sampler->resource.destructor = mlxsw_afa_sampler_destructor;
+	mlxsw_afa_resource_add(block, &sampler->resource);
+	return sampler;
+
+err_sampler_add:
+	kfree(sampler);
+	return ERR_PTR(err);
+}
+
+static int
+mlxsw_afa_block_append_allocated_sampler(struct mlxsw_afa_block *block,
+					 u8 mirror_agent, u32 rate)
+{
+	char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_SAMPLER_CODE,
+						  MLXSW_AFA_SAMPLER_SIZE);
+
+	if (IS_ERR(act))
+		return PTR_ERR(act);
+	mlxsw_afa_sampler_pack(act, mirror_agent, rate);
+	return 0;
+}
+
+int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u8 local_port,
+				   struct psample_group *psample_group,
+				   u32 rate, u32 trunc_size, bool truncate,
+				   bool ingress,
+				   struct netlink_ext_ack *extack)
+{
+	struct mlxsw_afa_sampler *sampler;
+	int err;
+
+	if (rate > MLXSW_AFA_SAMPLER_RATE_MAX) {
+		NL_SET_ERR_MSG_MOD(extack, "Sampling rate is too high");
+		return -EINVAL;
+	}
+
+	sampler = mlxsw_afa_sampler_create(block, local_port, psample_group,
+					   rate, trunc_size, truncate, ingress,
+					   extack);
+	if (IS_ERR(sampler))
+		return PTR_ERR(sampler);
+
+	err = mlxsw_afa_block_append_allocated_sampler(block, sampler->span_id,
+						       rate);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot append sampler action");
+		goto err_append_allocated_sampler;
+	}
+
+	return 0;
+
+err_append_allocated_sampler:
+	mlxsw_afa_sampler_destroy(block, sampler);
+	return err;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_sampler);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index b652497b1002..b65bf98eb5ab 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -30,6 +30,12 @@ struct mlxsw_afa_ops {
 			   u16 *p_policer_index,
 			   struct netlink_ext_ack *extack);
 	void (*policer_del)(void *priv, u16 policer_index);
+	int (*sampler_add)(void *priv, u8 local_port,
+			   struct psample_group *psample_group, u32 rate,
+			   u32 trunc_size, bool truncate, bool ingress,
+			   int *p_span_id, struct netlink_ext_ack *extack);
+	void (*sampler_del)(void *priv, u8 local_port, int span_id,
+			    bool ingress);
 	bool dummy_first_set;
 };
 
@@ -92,5 +98,10 @@ int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block,
 				  u32 fa_index, u64 rate_bytes_ps, u32 burst,
 				  u16 *p_policer_index,
 				  struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u8 local_port,
+				   struct psample_group *psample_group,
+				   u32 rate, u32 trunc_size, bool truncate,
+				   bool ingress,
+				   struct netlink_ext_ack *extack);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index d0052537e627..8e8456811384 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -540,6 +540,55 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
 	spin_unlock(&q->lock);
 }
 
+static void mlxsw_pci_cqe_rdq_md_tx_port_init(struct sk_buff *skb,
+					      const char *cqe)
+{
+	struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb);
+
+	if (mlxsw_pci_cqe2_tx_lag_get(cqe)) {
+		cb->rx_md_info.tx_port_is_lag = true;
+		cb->rx_md_info.tx_lag_id = mlxsw_pci_cqe2_tx_lag_id_get(cqe);
+		cb->rx_md_info.tx_lag_port_index =
+			mlxsw_pci_cqe2_tx_lag_subport_get(cqe);
+	} else {
+		cb->rx_md_info.tx_port_is_lag = false;
+		cb->rx_md_info.tx_sys_port =
+			mlxsw_pci_cqe2_tx_system_port_get(cqe);
+	}
+
+	if (cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT &&
+	    cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_INVALID)
+		cb->rx_md_info.tx_port_valid = 1;
+	else
+		cb->rx_md_info.tx_port_valid = 0;
+}
+
+static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe)
+{
+	struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb);
+
+	cb->rx_md_info.tx_congestion = mlxsw_pci_cqe2_mirror_cong_get(cqe);
+	if (cb->rx_md_info.tx_congestion != MLXSW_PCI_CQE2_MIRROR_CONG_INVALID)
+		cb->rx_md_info.tx_congestion_valid = 1;
+	else
+		cb->rx_md_info.tx_congestion_valid = 0;
+	cb->rx_md_info.tx_congestion <<= MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT;
+
+	cb->rx_md_info.latency = mlxsw_pci_cqe2_mirror_latency_get(cqe);
+	if (cb->rx_md_info.latency != MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID)
+		cb->rx_md_info.latency_valid = 1;
+	else
+		cb->rx_md_info.latency_valid = 0;
+
+	cb->rx_md_info.tx_tc = mlxsw_pci_cqe2_mirror_tclass_get(cqe);
+	if (cb->rx_md_info.tx_tc != MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID)
+		cb->rx_md_info.tx_tc_valid = 1;
+	else
+		cb->rx_md_info.tx_tc_valid = 0;
+
+	mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe);
+}
+
 static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 				     struct mlxsw_pci_queue *q,
 				     u16 consumer_counter_limit,
@@ -581,11 +630,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 
 		if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2)
 			cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe);
-		mlxsw_skb_cb(skb)->cookie_index = cookie_index;
+		mlxsw_skb_cb(skb)->rx_md_info.cookie_index = cookie_index;
 	} else if (rx_info.trap_id >= MLXSW_TRAP_ID_MIRROR_SESSION0 &&
 		   rx_info.trap_id <= MLXSW_TRAP_ID_MIRROR_SESSION7 &&
 		   mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) {
 		rx_info.mirror_reason = mlxsw_pci_cqe2_mirror_reason_get(cqe);
+		mlxsw_pci_cqe_rdq_md_init(skb, cqe);
+	} else if (rx_info.trap_id == MLXSW_TRAP_ID_PKT_SAMPLE &&
+		   mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) {
+		mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe);
 	}
 
 	byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index a2c1fbd3e0d1..7b531228d6c0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -173,6 +173,15 @@ MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
  */
 MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
 
+#define MLXSW_PCI_CQE2_MIRROR_CONG_INVALID	0xFFFF
+
+/* pci_cqe_mirror_cong_high
+ * Congestion level in units of 8KB of the egress traffic class of the original
+ * packet that does mirroring to the CPU. Value of 0xFFFF means that the
+ * congestion level is invalid.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_cong_high, 0x08, 16, 4);
+
 /* pci_cqe_trap_id
  * Trap ID that captured the packet.
  */
@@ -208,6 +217,59 @@ MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5);
 MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6);
 mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12);
 
+#define MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID	0x1F
+
+/* pci_cqe_mirror_tclass
+ * The egress traffic class of the original packet that does mirroring to the
+ * CPU. Value of 0x1F means that the traffic class is invalid.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_tclass, 0x10, 27, 5);
+
+/* pci_cqe_tx_lag
+ * The Tx port of a packet that is mirrored / sampled to the CPU is a LAG.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_lag, 0x10, 24, 1);
+
+/* pci_cqe_tx_lag_subport
+ * The port index within the LAG of a packet that is mirrored / sampled to the
+ * CPU. Reserved when tx_lag is 0.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_lag_subport, 0x10, 16, 8);
+
+#define MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT	0xFFFE
+#define MLXSW_PCI_CQE2_TX_PORT_INVALID		0xFFFF
+
+/* pci_cqe_tx_lag_id
+ * The Tx LAG ID of the original packet that is mirrored / sampled to the CPU.
+ * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx LAG ID
+ * is invalid. Reserved when tx_lag is 0.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_lag_id, 0x10, 0, 16);
+
+/* pci_cqe_tx_system_port
+ * The Tx port of the original packet that is mirrored / sampled to the CPU.
+ * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx port is
+ * invalid. Reserved when tx_lag is 1.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_system_port, 0x10, 0, 16);
+
+/* pci_cqe_mirror_cong_low
+ * Congestion level in units of 8KB of the egress traffic class of the original
+ * packet that does mirroring to the CPU. Value of 0xFFFF means that the
+ * congestion level is invalid.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_cong_low, 0x14, 20, 12);
+
+#define MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT	13	/* Units of 8KB. */
+
+static inline u16 mlxsw_pci_cqe2_mirror_cong_get(const char *cqe)
+{
+	u16 cong_high = mlxsw_pci_cqe2_mirror_cong_high_get(cqe);
+	u16 cong_low = mlxsw_pci_cqe2_mirror_cong_low_get(cqe);
+
+	return cong_high << 12 | cong_low;
+}
+
 /* pci_cqe_user_def_val_orig_pkt_len
  * When trap_id is an ACL: User defined value from policy engine action.
  */
@@ -218,6 +280,15 @@ MLXSW_ITEM32(pci, cqe2, user_def_val_orig_pkt_len, 0x14, 0, 20);
  */
 MLXSW_ITEM32(pci, cqe2, mirror_reason, 0x18, 24, 8);
 
+#define MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID	0xFFFFFF
+
+/* pci_cqe_mirror_latency
+ * End-to-end latency of the original packet that does mirroring to the CPU.
+ * Value of 0xFFFFFF means that the latency is invalid. Units are according to
+ * MOGCR.mirror_latency_units.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_latency, 0x1C, 8, 24);
+
 /* pci_cqe_owner
  * Ownership bit.
  */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index c4adc7f740d3..900b4bf5bb5b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -842,6 +842,14 @@ MLXSW_ITEM32(reg, spvid, local_port, 0x00, 16, 8);
  */
 MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8);
 
+/* reg_spvid_egr_et_set
+ * When VLAN is pushed at ingress (for untagged packets or for
+ * QinQ push mode) then the EtherType is decided at the egress port.
+ * Reserved when Spectrum-1.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spvid, egr_et_set, 0x04, 24, 1);
+
 /* reg_spvid_et_vlan
  * EtherType used for when VLAN is pushed at ingress (for untagged
  * packets or for QinQ push mode).
@@ -849,6 +857,7 @@ MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8);
  * 1: ether_type1
  * 2: ether_type2 - Reserved when Spectrum-1, supported by Spectrum-2
  * Ethertype IDs are configured by SVER.
+ * Reserved when egr_et_set = 1.
  * Access: RW
  */
 MLXSW_ITEM32(reg, spvid, et_vlan, 0x04, 16, 2);
@@ -2079,6 +2088,41 @@ static inline void mlxsw_reg_spvc_pack(char *payload, u8 local_port, bool et1,
 	mlxsw_reg_spvc_et0_set(payload, et0);
 }
 
+/* SPEVET - Switch Port Egress VLAN EtherType
+ * ------------------------------------------
+ * The switch port egress VLAN EtherType configures which EtherType to push at
+ * egress for packets incoming through a local port for which 'SPVID.egr_et_set'
+ * is set.
+ */
+#define MLXSW_REG_SPEVET_ID 0x202A
+#define MLXSW_REG_SPEVET_LEN 0x08
+
+MLXSW_REG_DEFINE(spevet, MLXSW_REG_SPEVET_ID, MLXSW_REG_SPEVET_LEN);
+
+/* reg_spevet_local_port
+ * Egress Local port number.
+ * Not supported to CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spevet, local_port, 0x00, 16, 8);
+
+/* reg_spevet_et_vlan
+ * Egress EtherType VLAN to push when SPVID.egr_et_set field set for the packet:
+ * 0: ether_type0 - (default)
+ * 1: ether_type1
+ * 2: ether_type2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spevet, et_vlan, 0x04, 16, 2);
+
+static inline void mlxsw_reg_spevet_pack(char *payload, u8 local_port,
+					 u8 et_vlan)
+{
+	MLXSW_REG_ZERO(spevet, payload);
+	mlxsw_reg_spevet_local_port_set(payload, local_port);
+	mlxsw_reg_spevet_et_vlan_set(payload, et_vlan);
+}
+
 /* CWTP - Congetion WRED ECN TClass Profile
  * ----------------------------------------
  * Configures the profiles for queues of egress port and traffic class
@@ -5637,7 +5681,7 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port)
 
 MLXSW_REG_DEFINE(pmaos, MLXSW_REG_PMAOS_ID, MLXSW_REG_PMAOS_LEN);
 
-/* reg_slot_index
+/* reg_pmaos_slot_index
  * Slot index.
  * Access: Index
  */
@@ -8086,6 +8130,60 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
 	mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
 }
 
+/* RATRAD - Router Adjacency Table Activity Dump Register
+ * ------------------------------------------------------
+ * The RATRAD register is used to dump and optionally clear activity bits of
+ * router adjacency table entries.
+ */
+#define MLXSW_REG_RATRAD_ID 0x8022
+#define MLXSW_REG_RATRAD_LEN 0x210
+
+MLXSW_REG_DEFINE(ratrad, MLXSW_REG_RATRAD_ID, MLXSW_REG_RATRAD_LEN);
+
+enum {
+	/* Read activity */
+	MLXSW_REG_RATRAD_OP_READ_ACTIVITY,
+	/* Read and clear activity */
+	MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY,
+};
+
+/* reg_ratrad_op
+ * Access: Operation
+ */
+MLXSW_ITEM32(reg, ratrad, op, 0x00, 30, 2);
+
+/* reg_ratrad_ecmp_size
+ * ecmp_size is the amount of sequential entries from adjacency_index. Valid
+ * ranges:
+ * Spectrum-1: 32-64, 512, 1024, 2048, 4096
+ * Spectrum-2/3: 32-128, 256, 512, 1024, 2048, 4096
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratrad, ecmp_size, 0x00, 0, 13);
+
+/* reg_ratrad_adjacency_index
+ * Index into the adjacency table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratrad, adjacency_index, 0x04, 0, 24);
+
+/* reg_ratrad_activity_vector
+ * Activity bit per adjacency index.
+ * Bits higher than ecmp_size are reserved.
+ * Access: RO
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, ratrad, activity_vector, 0x10, 0x200, 1);
+
+static inline void mlxsw_reg_ratrad_pack(char *payload, u32 adjacency_index,
+					 u16 ecmp_size)
+{
+	MLXSW_REG_ZERO(ratrad, payload);
+	mlxsw_reg_ratrad_op_set(payload,
+				MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY);
+	mlxsw_reg_ratrad_ecmp_size_set(payload, ecmp_size);
+	mlxsw_reg_ratrad_adjacency_index_set(payload, adjacency_index);
+}
+
 /* RIGR-V2 - Router Interface Group Register Version 2
  * ---------------------------------------------------
  * The RIGR_V2 register is used to add, remove and query egress interface list
@@ -9925,15 +10023,28 @@ MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1);
  */
 MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4);
 
+#define MLXSW_REG_MPAR_RATE_MAX 3500000000UL
+
+/* reg_mpar_probability_rate
+ * Sampling rate.
+ * Valid values are: 1 to 3.5*10^9
+ * Value of 1 means "sample all". Default is 1.
+ * Reserved when Spectrum-1.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpar, probability_rate, 0x08, 0, 32);
+
 static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port,
 				       enum mlxsw_reg_mpar_i_e i_e,
-				       bool enable, u8 pa_id)
+				       bool enable, u8 pa_id,
+				       u32 probability_rate)
 {
 	MLXSW_REG_ZERO(mpar, payload);
 	mlxsw_reg_mpar_local_port_set(payload, local_port);
 	mlxsw_reg_mpar_enable_set(payload, enable);
 	mlxsw_reg_mpar_i_e_set(payload, i_e);
 	mlxsw_reg_mpar_pa_id_set(payload, pa_id);
+	mlxsw_reg_mpar_probability_rate_set(payload, probability_rate);
 }
 
 /* MGIR - Management General Information Register
@@ -10577,6 +10688,8 @@ MLXSW_ITEM32(reg, mpagr, trigger, 0x00, 0, 4);
  */
 MLXSW_ITEM32(reg, mpagr, pa_id, 0x04, 0, 4);
 
+#define MLXSW_REG_MPAGR_RATE_MAX 3500000000UL
+
 /* reg_mpagr_probability_rate
  * Sampling rate.
  * Valid values are: 1 to 3.5*10^9
@@ -10919,7 +11032,7 @@ MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN);
  * Which irisc triggered the event
  * Access: RO
  */
-MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 8, 4);
+MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 24, 8);
 
 enum mlxsw_reg_mfde_event_id {
 	MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO = 1,
@@ -10930,7 +11043,7 @@ enum mlxsw_reg_mfde_event_id {
 /* reg_mfde_event_id
  * Access: RO
  */
-MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 8);
+MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 16);
 
 enum mlxsw_reg_mfde_method {
 	MLXSW_REG_MFDE_METHOD_QUERY,
@@ -10979,6 +11092,13 @@ MLXSW_ITEM32(reg, mfde, log_address, 0x10, 0, 32);
  */
 MLXSW_ITEM32(reg, mfde, log_id, 0x14, 0, 4);
 
+/* reg_mfde_log_ip
+ * IP (instruction pointer) that triggered the timeout.
+ * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, mfde, log_ip, 0x18, 0, 64);
+
 /* reg_mfde_pipes_mask
  * Bit per kvh pipe.
  * Access: RO
@@ -11995,6 +12115,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(sfmr),
 	MLXSW_REG(spvmlr),
 	MLXSW_REG(spvc),
+	MLXSW_REG(spevet),
 	MLXSW_REG(cwtp),
 	MLXSW_REG(cwtpm),
 	MLXSW_REG(pgcr),
@@ -12047,6 +12168,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(rtar),
 	MLXSW_REG(ratr),
 	MLXSW_REG(rtdp),
+	MLXSW_REG(ratrad),
 	MLXSW_REG(rdpm),
 	MLXSW_REG(ricnt),
 	MLXSW_REG(rrcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 1650d9852b5b..efc7acb4842c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -23,6 +23,8 @@
 #include <linux/netlink.h>
 #include <linux/jhash.h>
 #include <linux/log2.h>
+#include <linux/refcount.h>
+#include <linux/rhashtable.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/netevent.h>
@@ -45,7 +47,7 @@
 
 #define MLXSW_SP1_FWREV_MAJOR 13
 #define MLXSW_SP1_FWREV_MINOR 2008
-#define MLXSW_SP1_FWREV_SUBMINOR 2018
+#define MLXSW_SP1_FWREV_SUBMINOR 2406
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -62,7 +64,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
 
 #define MLXSW_SP2_FWREV_MAJOR 29
 #define MLXSW_SP2_FWREV_MINOR 2008
-#define MLXSW_SP2_FWREV_SUBMINOR 2018
+#define MLXSW_SP2_FWREV_SUBMINOR 2406
 
 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
 	.major = MLXSW_SP2_FWREV_MAJOR,
@@ -77,7 +79,7 @@ static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
 
 #define MLXSW_SP3_FWREV_MAJOR 30
 #define MLXSW_SP3_FWREV_MINOR 2008
-#define MLXSW_SP3_FWREV_SUBMINOR 2018
+#define MLXSW_SP3_FWREV_SUBMINOR 2406
 
 static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = {
 	.major = MLXSW_SP3_FWREV_MAJOR,
@@ -400,6 +402,22 @@ int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type)
 	return 0;
 }
 
+int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				     u16 ethtype)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char spevet_pl[MLXSW_REG_SPEVET_LEN];
+	u8 sver_type;
+	int err;
+
+	err = mlxsw_sp_ethtype_to_sver_type(ethtype, &sver_type);
+	if (err)
+		return err;
+
+	mlxsw_reg_spevet_pack(spevet_pl, mlxsw_sp_port->local_port, sver_type);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spevet), spevet_pl);
+}
+
 static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				    u16 vid, u16 ethtype)
 {
@@ -2212,32 +2230,6 @@ void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
 }
 
-void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
-			     u8 local_port)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
-	struct mlxsw_sp_port_sample *sample;
-	u32 size;
-
-	if (unlikely(!mlxsw_sp_port)) {
-		dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n",
-				     local_port);
-		goto out;
-	}
-
-	rcu_read_lock();
-	sample = rcu_dereference(mlxsw_sp_port->sample);
-	if (!sample)
-		goto out_unlock;
-	size = sample->truncate ? sample->trunc_size : skb->len;
-	psample_sample_packet(sample->psample_group, skb, size,
-			      mlxsw_sp_port->dev->ifindex, 0, sample->rate);
-out_unlock:
-	rcu_read_unlock();
-out:
-	consume_skb(skb);
-}
-
 #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
 	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
 		  _is_ctrl, SP_##_trap_group, DISCARD)
@@ -2576,6 +2568,142 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
 	.get_stats	= mlxsw_sp2_get_stats,
 };
 
+struct mlxsw_sp_sample_trigger_node {
+	struct mlxsw_sp_sample_trigger trigger;
+	struct mlxsw_sp_sample_params params;
+	struct rhash_head ht_node;
+	struct rcu_head rcu;
+	refcount_t refcount;
+};
+
+static const struct rhashtable_params mlxsw_sp_sample_trigger_ht_params = {
+	.key_offset = offsetof(struct mlxsw_sp_sample_trigger_node, trigger),
+	.head_offset = offsetof(struct mlxsw_sp_sample_trigger_node, ht_node),
+	.key_len = sizeof(struct mlxsw_sp_sample_trigger),
+	.automatic_shrinking = true,
+};
+
+static void
+mlxsw_sp_sample_trigger_key_init(struct mlxsw_sp_sample_trigger *key,
+				 const struct mlxsw_sp_sample_trigger *trigger)
+{
+	memset(key, 0, sizeof(*key));
+	key->type = trigger->type;
+	key->local_port = trigger->local_port;
+}
+
+/* RCU read lock must be held */
+struct mlxsw_sp_sample_params *
+mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp,
+				      const struct mlxsw_sp_sample_trigger *trigger)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	struct mlxsw_sp_sample_trigger key;
+
+	mlxsw_sp_sample_trigger_key_init(&key, trigger);
+	trigger_node = rhashtable_lookup(&mlxsw_sp->sample_trigger_ht, &key,
+					 mlxsw_sp_sample_trigger_ht_params);
+	if (!trigger_node)
+		return NULL;
+
+	return &trigger_node->params;
+}
+
+static int
+mlxsw_sp_sample_trigger_node_init(struct mlxsw_sp *mlxsw_sp,
+				  const struct mlxsw_sp_sample_trigger *trigger,
+				  const struct mlxsw_sp_sample_params *params)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	int err;
+
+	trigger_node = kzalloc(sizeof(*trigger_node), GFP_KERNEL);
+	if (!trigger_node)
+		return -ENOMEM;
+
+	trigger_node->trigger = *trigger;
+	trigger_node->params = *params;
+	refcount_set(&trigger_node->refcount, 1);
+
+	err = rhashtable_insert_fast(&mlxsw_sp->sample_trigger_ht,
+				     &trigger_node->ht_node,
+				     mlxsw_sp_sample_trigger_ht_params);
+	if (err)
+		goto err_rhashtable_insert;
+
+	return 0;
+
+err_rhashtable_insert:
+	kfree(trigger_node);
+	return err;
+}
+
+static void
+mlxsw_sp_sample_trigger_node_fini(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_sample_trigger_node *trigger_node)
+{
+	rhashtable_remove_fast(&mlxsw_sp->sample_trigger_ht,
+			       &trigger_node->ht_node,
+			       mlxsw_sp_sample_trigger_ht_params);
+	kfree_rcu(trigger_node, rcu);
+}
+
+int
+mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp,
+				   const struct mlxsw_sp_sample_trigger *trigger,
+				   const struct mlxsw_sp_sample_params *params,
+				   struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	struct mlxsw_sp_sample_trigger key;
+
+	ASSERT_RTNL();
+
+	mlxsw_sp_sample_trigger_key_init(&key, trigger);
+
+	trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht,
+					      &key,
+					      mlxsw_sp_sample_trigger_ht_params);
+	if (!trigger_node)
+		return mlxsw_sp_sample_trigger_node_init(mlxsw_sp, &key,
+							 params);
+
+	if (trigger_node->params.psample_group != params->psample_group ||
+	    trigger_node->params.truncate != params->truncate ||
+	    trigger_node->params.rate != params->rate ||
+	    trigger_node->params.trunc_size != params->trunc_size) {
+		NL_SET_ERR_MSG_MOD(extack, "Sampling parameters do not match for an existing sampling trigger");
+		return -EINVAL;
+	}
+
+	refcount_inc(&trigger_node->refcount);
+
+	return 0;
+}
+
+void
+mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp,
+				     const struct mlxsw_sp_sample_trigger *trigger)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	struct mlxsw_sp_sample_trigger key;
+
+	ASSERT_RTNL();
+
+	mlxsw_sp_sample_trigger_key_init(&key, trigger);
+
+	trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht,
+					      &key,
+					      mlxsw_sp_sample_trigger_ht_params);
+	if (!trigger_node)
+		return;
+
+	if (!refcount_dec_and_test(&trigger_node->refcount))
+		return;
+
+	mlxsw_sp_sample_trigger_node_fini(mlxsw_sp, trigger_node);
+}
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr);
 
@@ -2730,6 +2858,13 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_port_module_info_init;
 	}
 
+	err = rhashtable_init(&mlxsw_sp->sample_trigger_ht,
+			      &mlxsw_sp_sample_trigger_ht_params);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init sampling trigger hashtable\n");
+		goto err_sample_trigger_init;
+	}
+
 	err = mlxsw_sp_ports_create(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
@@ -2739,6 +2874,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	return 0;
 
 err_ports_create:
+	rhashtable_destroy(&mlxsw_sp->sample_trigger_ht);
+err_sample_trigger_init:
 	mlxsw_sp_port_module_info_fini(mlxsw_sp);
 err_port_module_info_init:
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
@@ -2788,6 +2925,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+	mlxsw_sp->switchdev_ops = &mlxsw_sp1_switchdev_ops;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
@@ -2796,7 +2934,6 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask;
-	mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
 	mlxsw_sp->sb_ops = &mlxsw_sp1_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
@@ -2804,6 +2941,8 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->span_ops = &mlxsw_sp1_span_ops;
 	mlxsw_sp->policer_core_ops = &mlxsw_sp1_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp1_trap_ops;
+	mlxsw_sp->mall_ops = &mlxsw_sp1_mall_ops;
+	mlxsw_sp->router_ops = &mlxsw_sp1_router_ops;
 	mlxsw_sp->listeners = mlxsw_sp1_listener;
 	mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1;
@@ -2817,6 +2956,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+	mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
@@ -2825,7 +2965,6 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
-	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
 	mlxsw_sp->sb_ops = &mlxsw_sp2_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
@@ -2833,6 +2972,8 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
 	mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops;
+	mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops;
+	mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -2844,6 +2985,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+	mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
@@ -2852,7 +2994,6 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
-	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
 	mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
@@ -2860,6 +3001,8 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->span_ops = &mlxsw_sp3_span_ops;
 	mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops;
+	mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops;
+	mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -2870,6 +3013,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
 	mlxsw_sp_ports_remove(mlxsw_sp);
+	rhashtable_destroy(&mlxsw_sp->sample_trigger_ht);
 	mlxsw_sp_port_module_info_fini(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
@@ -4283,7 +4427,7 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
 		if (br_vlan_enabled(br_dev)) {
 			br_vlan_get_proto(br_dev, &proto);
 			if (proto == ETH_P_8021AD) {
-				NL_SET_ERR_MSG_MOD(extack, "Uppers are not supported on top of an 802.1ad bridge");
+				NL_SET_ERR_MSG_MOD(extack, "Upper devices are not supported on top of an 802.1ad bridge");
 				return -EOPNOTSUPP;
 			}
 		}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index d9d9e1f488f9..97d074d7b78d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -16,6 +16,7 @@
 #include <linux/in6.h>
 #include <linux/notifier.h>
 #include <linux/net_namespace.h>
+#include <linux/spinlock.h>
 #include <net/psample.h>
 #include <net/pkt_cls.h>
 #include <net/red.h>
@@ -86,10 +87,15 @@ enum mlxsw_sp_rif_type {
 	MLXSW_SP_RIF_TYPE_MAX,
 };
 
-struct mlxsw_sp_rif_ops;
+struct mlxsw_sp_router_ops;
 
-extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[];
-extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[];
+extern const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops;
+extern const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops;
+
+struct mlxsw_sp_switchdev_ops;
+
+extern const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops;
+extern const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops;
 
 enum mlxsw_sp_fid_type {
 	MLXSW_SP_FID_TYPE_8021Q,
@@ -133,6 +139,7 @@ struct mlxsw_sp_ptp_state;
 struct mlxsw_sp_ptp_ops;
 struct mlxsw_sp_span_ops;
 struct mlxsw_sp_qdisc_state;
+struct mlxsw_sp_mall_entry;
 
 struct mlxsw_sp_port_mapping {
 	u8 module;
@@ -148,6 +155,7 @@ struct mlxsw_sp {
 	const unsigned char *mac_mask;
 	struct mlxsw_sp_upper *lags;
 	struct mlxsw_sp_port_mapping **port_mapping;
+	struct rhashtable sample_trigger_ht;
 	struct mlxsw_sp_sb *sb;
 	struct mlxsw_sp_bridge *bridge;
 	struct mlxsw_sp_router *router;
@@ -164,6 +172,7 @@ struct mlxsw_sp {
 	struct mlxsw_sp_counter_pool *counter_pool;
 	struct mlxsw_sp_span *span;
 	struct mlxsw_sp_trap *trap;
+	const struct mlxsw_sp_switchdev_ops *switchdev_ops;
 	const struct mlxsw_sp_kvdl_ops *kvdl_ops;
 	const struct mlxsw_afa_ops *afa_ops;
 	const struct mlxsw_afk_ops *afk_ops;
@@ -171,7 +180,6 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops;
 	const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
 	const struct mlxsw_sp_nve_ops **nve_ops_arr;
-	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 	const struct mlxsw_sp_sb_vals *sb_vals;
 	const struct mlxsw_sp_sb_ops *sb_ops;
 	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
@@ -179,6 +187,8 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_span_ops *span_ops;
 	const struct mlxsw_sp_policer_core_ops *policer_core_ops;
 	const struct mlxsw_sp_trap_ops *trap_ops;
+	const struct mlxsw_sp_mall_ops *mall_ops;
+	const struct mlxsw_sp_router_ops *router_ops;
 	const struct mlxsw_listener *listeners;
 	size_t listeners_count;
 	u32 lowest_shaper_bs;
@@ -232,7 +242,18 @@ struct mlxsw_sp_port_pcpu_stats {
 	u32			tx_dropped;
 };
 
-struct mlxsw_sp_port_sample {
+enum mlxsw_sp_sample_trigger_type {
+	MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS,
+	MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS,
+	MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+};
+
+struct mlxsw_sp_sample_trigger {
+	enum mlxsw_sp_sample_trigger_type type;
+	u8 local_port; /* Reserved when trigger type is not ingress / egress. */
+};
+
+struct mlxsw_sp_sample_params {
 	struct psample_group *psample_group;
 	u32 trunc_size;
 	u32 rate;
@@ -302,7 +323,6 @@ struct mlxsw_sp_port {
 		struct mlxsw_sp_port_xstats xstats;
 		struct delayed_work update_dw;
 	} periodic_hw_stats;
-	struct mlxsw_sp_port_sample __rcu *sample;
 	struct list_head vlans_list;
 	struct mlxsw_sp_port_vlan *default_vlan;
 	struct mlxsw_sp_qdisc_state *qdisc;
@@ -531,6 +551,17 @@ void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port,
 				      struct mlxsw_sp_hdroom *hdroom);
 int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port,
 			      const struct mlxsw_sp_hdroom *hdroom);
+struct mlxsw_sp_sample_params *
+mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp,
+				      const struct mlxsw_sp_sample_trigger *trigger);
+int
+mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp,
+				   const struct mlxsw_sp_sample_trigger *trigger,
+				   const struct mlxsw_sp_sample_params *params,
+				   struct netlink_ext_ack *extack);
+void
+mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp,
+				     const struct mlxsw_sp_sample_trigger *trigger);
 
 extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals;
 extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals;
@@ -568,8 +599,6 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
 				       u8 local_port, void *priv);
 void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 			  u8 local_port);
-void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
-			     u8 local_port);
 int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
@@ -586,6 +615,8 @@ int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable);
 int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
 				   bool learn_enable);
 int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type);
+int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				     u16 ethtype);
 int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
 			   u16 ethtype);
 struct mlxsw_sp_port_vlan *
@@ -924,6 +955,12 @@ int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
 int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_acl_rule_info *rulei,
 				   u16 fid, struct netlink_ext_ack *extack);
+int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_rule_info *rulei,
+				  struct mlxsw_sp_flow_block *block,
+				  struct psample_group *psample_group, u32 rate,
+				  u32 trunc_size, bool truncate,
+				  struct netlink_ext_ack *extack);
 
 struct mlxsw_sp_acl_rule;
 
@@ -1033,6 +1070,19 @@ extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
 extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 
 /* spectrum_matchall.c */
+struct mlxsw_sp_mall_ops {
+	int (*sample_add)(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct mlxsw_sp_mall_entry *mall_entry,
+			  struct netlink_ext_ack *extack);
+	void (*sample_del)(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct mlxsw_sp_mall_entry *mall_entry);
+};
+
+extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops;
+extern const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops;
+
 enum mlxsw_sp_mall_action_type {
 	MLXSW_SP_MALL_ACTION_TYPE_MIRROR,
 	MLXSW_SP_MALL_ACTION_TYPE_SAMPLE,
@@ -1048,6 +1098,11 @@ struct mlxsw_sp_mall_trap_entry {
 	int span_id;
 };
 
+struct mlxsw_sp_mall_sample_entry {
+	struct mlxsw_sp_sample_params params;
+	int span_id;	/* Relevant for Spectrum-2 onwards. */
+};
+
 struct mlxsw_sp_mall_entry {
 	struct list_head list;
 	unsigned long cookie;
@@ -1057,7 +1112,7 @@ struct mlxsw_sp_mall_entry {
 	union {
 		struct mlxsw_sp_mall_mirror_entry mirror;
 		struct mlxsw_sp_mall_trap_entry trap;
-		struct mlxsw_sp_port_sample sample;
+		struct mlxsw_sp_mall_sample_entry sample;
 	};
 	struct rcu_head rcu;
 };
@@ -1068,7 +1123,8 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 			   struct tc_cls_matchall_offload *f);
 int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
-			    struct mlxsw_sp_port *mlxsw_sp_port);
+			    struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct netlink_ext_ack *extack);
 void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
 			       struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 8cfa03a75374..67cedfa76f78 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -688,6 +688,31 @@ int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
 	return mlxsw_afa_block_append_fid_set(rulei->act_block, fid, extack);
 }
 
+int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_rule_info *rulei,
+				  struct mlxsw_sp_flow_block *block,
+				  struct psample_group *psample_group, u32 rate,
+				  u32 trunc_size, bool truncate,
+				  struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_flow_block_binding *binding;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	if (!list_is_singular(&block->binding_list)) {
+		NL_SET_ERR_MSG_MOD(extack, "Only a single sampling source is allowed");
+		return -EOPNOTSUPP;
+	}
+	binding = list_first_entry(&block->binding_list,
+				   struct mlxsw_sp_flow_block_binding, list);
+	mlxsw_sp_port = binding->mlxsw_sp_port;
+
+	return mlxsw_afa_block_append_sampler(rulei->act_block,
+					      mlxsw_sp_port->local_port,
+					      psample_group, rate, trunc_size,
+					      truncate, binding->ingress,
+					      extack);
+}
+
 struct mlxsw_sp_acl_rule *
 mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
 			 struct mlxsw_sp_acl_ruleset *ruleset,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 90372d1c28d4..c72aa38424dc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -192,6 +192,22 @@ static void mlxsw_sp_act_policer_del(void *priv, u16 policer_index)
 			     policer_index);
 }
 
+static int mlxsw_sp1_act_sampler_add(void *priv, u8 local_port,
+				     struct psample_group *psample_group,
+				     u32 rate, u32 trunc_size, bool truncate,
+				     bool ingress, int *p_span_id,
+				     struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG_MOD(extack, "Sampling action is not supported on Spectrum-1");
+	return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp1_act_sampler_del(void *priv, u8 local_port, int span_id,
+				      bool ingress)
+{
+	WARN_ON_ONCE(1);
+}
+
 const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
 	.kvdl_set_add		= mlxsw_sp1_act_kvdl_set_add,
 	.kvdl_set_del		= mlxsw_sp_act_kvdl_set_del,
@@ -204,8 +220,73 @@ const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
 	.mirror_del		= mlxsw_sp_act_mirror_del,
 	.policer_add		= mlxsw_sp_act_policer_add,
 	.policer_del		= mlxsw_sp_act_policer_del,
+	.sampler_add		= mlxsw_sp1_act_sampler_add,
+	.sampler_del		= mlxsw_sp1_act_sampler_del,
 };
 
+static int mlxsw_sp2_act_sampler_add(void *priv, u8 local_port,
+				     struct psample_group *psample_group,
+				     u32 rate, u32 trunc_size, bool truncate,
+				     bool ingress, int *p_span_id,
+				     struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_span_agent_parms agent_parms = {
+		.session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
+	};
+	struct mlxsw_sp_sample_trigger trigger = {
+		.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+	};
+	struct mlxsw_sp_sample_params params;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp = priv;
+	int err;
+
+	params.psample_group = psample_group;
+	params.trunc_size = trunc_size;
+	params.rate = rate;
+	params.truncate = truncate;
+	err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger, &params,
+						 extack);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_span_agent_get(mlxsw_sp, p_span_id, &agent_parms);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to get SPAN agent");
+		goto err_span_agent_get;
+	}
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to get analyzed port");
+		goto err_analyzed_port_get;
+	}
+
+	return 0;
+
+err_analyzed_port_get:
+	mlxsw_sp_span_agent_put(mlxsw_sp, *p_span_id);
+err_span_agent_get:
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
+	return err;
+}
+
+static void mlxsw_sp2_act_sampler_del(void *priv, u8 local_port, int span_id,
+				      bool ingress)
+{
+	struct mlxsw_sp_sample_trigger trigger = {
+		.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+	};
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress);
+	mlxsw_sp_span_agent_put(mlxsw_sp, span_id);
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
+}
+
 const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = {
 	.kvdl_set_add		= mlxsw_sp2_act_kvdl_set_add,
 	.kvdl_set_del		= mlxsw_sp_act_kvdl_set_del,
@@ -218,6 +299,8 @@ const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = {
 	.mirror_del		= mlxsw_sp_act_mirror_del,
 	.policer_add		= mlxsw_sp_act_policer_add,
 	.policer_del		= mlxsw_sp_act_policer_del,
+	.sampler_add		= mlxsw_sp2_act_sampler_add,
+	.sampler_del		= mlxsw_sp2_act_sampler_del,
 	.dummy_first_set	= true,
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index ed81d4fa48ac..1a2fef2a5379 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -912,9 +912,8 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
 	u64 size = 0;
 
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
-		if (mlxsw_sp_nexthop_offload(nh) &&
-		    !mlxsw_sp_nexthop_group_has_ipip(nh) &&
-		    !mlxsw_sp_nexthop_is_discard(nh))
+		if (mlxsw_sp_nexthop_is_forward(nh) &&
+		    !mlxsw_sp_nexthop_group_has_ipip(nh))
 			size++;
 	return size;
 }
@@ -1105,9 +1104,8 @@ start_again:
 	nh_skip = nh_count;
 	nh_count = 0;
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
-		if (!mlxsw_sp_nexthop_offload(nh) ||
-		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
-		    mlxsw_sp_nexthop_is_discard(nh))
+		if (!mlxsw_sp_nexthop_is_forward(nh) ||
+		    mlxsw_sp_nexthop_group_has_ipip(nh))
 			continue;
 
 		if (nh_count < nh_skip)
@@ -1180,6 +1178,7 @@ out:
 
 static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 {
+	char ratr_pl[MLXSW_REG_RATR_LEN];
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_nexthop *nh;
 	u32 adj_hash_index = 0;
@@ -1187,9 +1186,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 	u32 adj_size = 0;
 
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
-		if (!mlxsw_sp_nexthop_offload(nh) ||
-		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
-		    mlxsw_sp_nexthop_is_discard(nh))
+		if (!mlxsw_sp_nexthop_is_forward(nh) ||
+		    mlxsw_sp_nexthop_group_has_ipip(nh))
 			continue;
 
 		mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
@@ -1198,8 +1196,9 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 			mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
 		else
 			mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
-		mlxsw_sp_nexthop_update(mlxsw_sp,
-					adj_index + adj_hash_index, nh);
+		mlxsw_sp_nexthop_eth_update(mlxsw_sp,
+					    adj_index + adj_hash_index, nh,
+					    true, ratr_pl);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
index 0456cda33808..9e50c823a354 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
@@ -71,7 +71,7 @@ static int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
 		return -EOPNOTSUPP;
 	}
 
-	err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port);
+	err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port, extack);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 41855e58564b..be3791ca6069 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -24,6 +24,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 	const struct flow_action_entry *act;
 	int mirror_act_count = 0;
 	int police_act_count = 0;
+	int sample_act_count = 0;
 	int err, i;
 
 	if (!flow_action_has_entries(flow_action))
@@ -190,6 +191,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 				return -EOPNOTSUPP;
 			}
 
+			if (act->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
+
 			/* The kernel might adjust the requested burst size so
 			 * that it is not exactly a power of two. Re-adjust it
 			 * here since the hardware only supports burst sizes
@@ -204,6 +210,23 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 				return err;
 			break;
 			}
+		case FLOW_ACTION_SAMPLE: {
+			if (sample_act_count++) {
+				NL_SET_ERR_MSG_MOD(extack, "Multiple sample actions per rule are not supported");
+				return -EOPNOTSUPP;
+			}
+
+			err = mlxsw_sp_acl_rulei_act_sample(mlxsw_sp, rulei,
+							    block,
+							    act->sample.psample_group,
+							    act->sample.rate,
+							    act->sample.trunc_size,
+							    act->sample.truncate,
+							    extack);
+			if (err)
+				return err;
+			break;
+			}
 		default:
 			NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
 			dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 6ccca39bae84..b8b08a6a1d10 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -127,14 +127,16 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
 
 static int
 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				  struct mlxsw_sp_ipip_entry *ipip_entry)
+				  struct mlxsw_sp_ipip_entry *ipip_entry,
+				  bool force, char *ratr_pl)
 {
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
 	__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
-	char ratr_pl[MLXSW_REG_RATR_LEN];
+	enum mlxsw_reg_ratr_op op;
 
-	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
-			    true, MLXSW_REG_RATR_TYPE_IPIP,
+	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
+		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
+	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP,
 			    adj_index, rif_index);
 	mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 87bef9880e5e..f0837b42d1d6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -40,7 +40,8 @@ struct mlxsw_sp_ipip_ops {
 	enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
 
 	int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-			      struct mlxsw_sp_ipip_entry *ipip_entry);
+			      struct mlxsw_sp_ipip_entry *ipip_entry,
+			      bool force, char *ratr_pl);
 
 	bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
 			    const struct net_device *ol_dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index f30599ad6019..ce58a795c6fc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -24,7 +24,8 @@ mlxsw_sp_mall_entry_find(struct mlxsw_sp_flow_block *block, unsigned long cookie
 
 static int
 mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct mlxsw_sp_mall_entry *mall_entry)
+			      struct mlxsw_sp_mall_entry *mall_entry,
+			      struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_span_agent_parms agent_parms = {};
@@ -33,28 +34,35 @@ mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	int err;
 
 	if (!mall_entry->mirror.to_dev) {
-		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
+		NL_SET_ERR_MSG(extack, "Could not find requested device");
 		return -EINVAL;
 	}
 
 	agent_parms.to_dev = mall_entry->mirror.to_dev;
 	err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->mirror.span_id,
 				      &agent_parms);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get SPAN agent");
 		return err;
+	}
 
 	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port,
 					      mall_entry->ingress);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get analyzed port");
 		goto err_analyzed_port_get;
+	}
 
 	trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
 					MLXSW_SP_SPAN_TRIGGER_EGRESS;
 	parms.span_id = mall_entry->mirror.span_id;
+	parms.probability_rate = 1;
 	err = mlxsw_sp_span_agent_bind(mlxsw_sp, trigger, mlxsw_sp_port,
 				       &parms);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent");
 		goto err_agent_bind;
+	}
 
 	return 0;
 
@@ -93,46 +101,64 @@ static int mlxsw_sp_mall_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct mlxsw_sp_mall_entry *mall_entry)
+			      struct mlxsw_sp_mall_entry *mall_entry,
+			      struct netlink_ext_ack *extack)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_sample_trigger trigger;
 	int err;
 
-	if (rtnl_dereference(mlxsw_sp_port->sample)) {
-		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
-		return -EEXIST;
-	}
-	rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample);
+	if (mall_entry->ingress)
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS;
+	else
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS;
+	trigger.local_port = mlxsw_sp_port->local_port;
+	err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger,
+						 &mall_entry->sample.params,
+						 extack);
+	if (err)
+		return err;
 
-	err = mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true,
-					    mall_entry->sample.rate);
+	err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port,
+					     mall_entry, extack);
 	if (err)
 		goto err_port_sample_set;
 	return 0;
 
 err_port_sample_set:
-	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
 	return err;
 }
 
 static void
-mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port)
+mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct mlxsw_sp_mall_entry *mall_entry)
 {
-	if (!mlxsw_sp_port->sample)
-		return;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_sample_trigger trigger;
 
-	mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1);
-	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
+	if (mall_entry->ingress)
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS;
+	else
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS;
+	trigger.local_port = mlxsw_sp_port->local_port;
+
+	mlxsw_sp->mall_ops->sample_del(mlxsw_sp, mlxsw_sp_port, mall_entry);
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
 }
 
 static int
 mlxsw_sp_mall_port_rule_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			    struct mlxsw_sp_mall_entry *mall_entry)
+			    struct mlxsw_sp_mall_entry *mall_entry,
+			    struct netlink_ext_ack *extack)
 {
 	switch (mall_entry->type) {
 	case MLXSW_SP_MALL_ACTION_TYPE_MIRROR:
-		return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry);
+		return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry,
+						     extack);
 	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
-		return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry);
+		return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry,
+						     extack);
 	default:
 		WARN_ON(1);
 		return -EINVAL;
@@ -148,7 +174,7 @@ mlxsw_sp_mall_port_rule_del(struct mlxsw_sp_port *mlxsw_sp_port,
 		mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port, mall_entry);
 		break;
 	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
-		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port);
+		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port, mall_entry);
 		break;
 	default:
 		WARN_ON(1);
@@ -238,27 +264,17 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 		mall_entry->mirror.to_dev = act->dev;
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
-		if (!mall_entry->ingress) {
-			NL_SET_ERR_MSG(f->common.extack, "Sample is not supported on egress");
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
 		if (flower_prio_valid &&
 		    mall_entry->priority >= flower_min_prio) {
 			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
 			err = -EOPNOTSUPP;
 			goto errout;
 		}
-		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
-			NL_SET_ERR_MSG(f->common.extack, "Sample rate not supported");
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE;
-		mall_entry->sample.psample_group = act->sample.psample_group;
-		mall_entry->sample.truncate = act->sample.truncate;
-		mall_entry->sample.trunc_size = act->sample.trunc_size;
-		mall_entry->sample.rate = act->sample.rate;
+		mall_entry->sample.params.psample_group = act->sample.psample_group;
+		mall_entry->sample.params.truncate = act->sample.truncate;
+		mall_entry->sample.params.trunc_size = act->sample.trunc_size;
+		mall_entry->sample.params.rate = act->sample.rate;
 	} else {
 		err = -EOPNOTSUPP;
 		goto errout;
@@ -266,7 +282,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 
 	list_for_each_entry(binding, &block->binding_list, list) {
 		err = mlxsw_sp_mall_port_rule_add(binding->mlxsw_sp_port,
-						  mall_entry);
+						  mall_entry, f->common.extack);
 		if (err)
 			goto rollback;
 	}
@@ -314,13 +330,15 @@ void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 }
 
 int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
-			    struct mlxsw_sp_port *mlxsw_sp_port)
+			    struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_mall_entry *mall_entry;
 	int err;
 
 	list_for_each_entry(mall_entry, &block->mall.list, list) {
-		err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry);
+		err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry,
+						  extack);
 		if (err)
 			goto rollback;
 	}
@@ -355,3 +373,104 @@ int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
 	*p_max_prio = block->mall.max_prio;
 	return 0;
 }
+
+static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_port *mlxsw_sp_port,
+				     struct mlxsw_sp_mall_entry *mall_entry,
+				     struct netlink_ext_ack *extack)
+{
+	u32 rate = mall_entry->sample.params.rate;
+
+	if (!mall_entry->ingress) {
+		NL_SET_ERR_MSG(extack, "Sampling is not supported on egress");
+		return -EOPNOTSUPP;
+	}
+
+	if (rate > MLXSW_REG_MPSC_RATE_MAX) {
+		NL_SET_ERR_MSG(extack, "Unsupported sampling rate");
+		return -EOPNOTSUPP;
+	}
+
+	return mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true, rate);
+}
+
+static void mlxsw_sp1_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct mlxsw_sp_mall_entry *mall_entry)
+{
+	mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1);
+}
+
+const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = {
+	.sample_add = mlxsw_sp1_mall_sample_add,
+	.sample_del = mlxsw_sp1_mall_sample_del,
+};
+
+static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_port *mlxsw_sp_port,
+				     struct mlxsw_sp_mall_entry *mall_entry,
+				     struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
+	struct mlxsw_sp_span_agent_parms agent_parms = {
+		.to_dev = NULL,	/* Mirror to CPU. */
+		.session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
+	};
+	u32 rate = mall_entry->sample.params.rate;
+	enum mlxsw_sp_span_trigger span_trigger;
+	int err;
+
+	err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->sample.span_id,
+				      &agent_parms);
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get SPAN agent");
+		return err;
+	}
+
+	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port,
+					      mall_entry->ingress);
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get analyzed port");
+		goto err_analyzed_port_get;
+	}
+
+	span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
+					     MLXSW_SP_SPAN_TRIGGER_EGRESS;
+	trigger_parms.span_id = mall_entry->sample.span_id;
+	trigger_parms.probability_rate = rate;
+	err = mlxsw_sp_span_agent_bind(mlxsw_sp, span_trigger, mlxsw_sp_port,
+				       &trigger_parms);
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent");
+		goto err_agent_bind;
+	}
+
+	return 0;
+
+err_agent_bind:
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress);
+err_analyzed_port_get:
+	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id);
+	return err;
+}
+
+static void mlxsw_sp2_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct mlxsw_sp_mall_entry *mall_entry)
+{
+	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
+	enum mlxsw_sp_span_trigger span_trigger;
+
+	span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
+					     MLXSW_SP_SPAN_TRIGGER_EGRESS;
+	trigger_parms.span_id = mall_entry->sample.span_id;
+	mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port,
+				   &trigger_parms);
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress);
+	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id);
+}
+
+const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops = {
+	.sample_add = mlxsw_sp2_mall_sample_add,
+	.sample_del = mlxsw_sp2_mall_sample_del,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
index 2796d3659979..d8104fc6c900 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -18,7 +18,6 @@ struct mlxsw_sp_nve_config {
 	u32 ul_tb_id;
 	enum mlxsw_sp_l3proto ul_proto;
 	union mlxsw_sp_l3addr ul_sip;
-	u16 ethertype;
 };
 
 struct mlxsw_sp_nve {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
index 3e2bb22e9ca6..b84bb4b65098 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -113,7 +113,6 @@ static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
 	config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
 	config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
 	config->udp_dport = cfg->dst_port;
-	config->ethertype = params->ethertype;
 }
 
 static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
@@ -318,20 +317,14 @@ static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int
-mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp, u16 ethertype)
+mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp)
 {
 	char spvid_pl[MLXSW_REG_SPVID_LEN] = {};
-	u8 sver_type;
-	int err;
 
 	mlxsw_reg_spvid_tport_set(spvid_pl, true);
 	mlxsw_reg_spvid_local_port_set(spvid_pl,
 				       MLXSW_REG_TUNNEL_PORT_NVE);
-	err = mlxsw_sp_ethtype_to_sver_type(ethertype, &sver_type);
-	if (err)
-		return err;
-
-	mlxsw_reg_spvid_et_vlan_set(spvid_pl, sver_type);
+	mlxsw_reg_spvid_egr_et_set_set(spvid_pl, true);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl);
 }
 
@@ -367,7 +360,7 @@ mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_spvtr_write;
 
-	err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, config->ethertype);
+	err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp);
 	if (err)
 		goto err_decap_ethertype_set;
 
@@ -392,8 +385,6 @@ static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
 	char spvtr_pl[MLXSW_REG_SPVTR_LEN];
 	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
 
-	/* Set default EtherType */
-	mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, ETH_P_8021Q);
 	mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE,
 			     MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index fd672c6c9133..baf17c0b2702 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -1341,6 +1341,7 @@ static int mlxsw_sp_qevent_span_configure(struct mlxsw_sp *mlxsw_sp,
 		goto err_analyzed_port_get;
 
 	trigger_parms.span_id = span_id;
+	trigger_parms.probability_rate = 1;
 	err = mlxsw_sp_span_agent_bind(mlxsw_sp, qevent_binding->span_trigger, mlxsw_sp_port,
 				       &trigger_parms);
 	if (err)
@@ -1404,7 +1405,9 @@ static int mlxsw_sp_qevent_trap_configure(struct mlxsw_sp *mlxsw_sp,
 					  struct mlxsw_sp_mall_entry *mall_entry,
 					  struct mlxsw_sp_qevent_binding *qevent_binding)
 {
-	struct mlxsw_sp_span_agent_parms agent_parms = {};
+	struct mlxsw_sp_span_agent_parms agent_parms = {
+		.session_id = MLXSW_SP_SPAN_SESSION_ID_BUFFER,
+	};
 	int err;
 
 	err = mlxsw_sp_trap_group_policer_hw_id_get(mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index eda99d82766a..6ccaa194733b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -113,6 +113,10 @@ struct mlxsw_sp_rif_ops {
 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
 };
 
+struct mlxsw_sp_router_ops {
+	int (*init)(struct mlxsw_sp *mlxsw_sp);
+};
+
 static struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
 			 const struct net_device *dev);
@@ -2662,6 +2666,10 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
 			goto out;
 	}
 
+	if (neigh_entry->connected && entry_connected &&
+	    !memcmp(neigh_entry->ha, ha, ETH_ALEN))
+		goto out;
+
 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
@@ -2842,6 +2850,15 @@ enum mlxsw_sp_nexthop_type {
 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
 };
 
+enum mlxsw_sp_nexthop_action {
+	/* Nexthop forwards packets to an egress RIF */
+	MLXSW_SP_NEXTHOP_ACTION_FORWARD,
+	/* Nexthop discards packets */
+	MLXSW_SP_NEXTHOP_ACTION_DISCARD,
+	/* Nexthop traps packets */
+	MLXSW_SP_NEXTHOP_ACTION_TRAP,
+};
+
 struct mlxsw_sp_nexthop_key {
 	struct fib_nh *fib_nh;
 };
@@ -2862,16 +2879,16 @@ struct mlxsw_sp_nexthop {
 	int norm_nh_weight;
 	int num_adj_entries;
 	struct mlxsw_sp_rif *rif;
-	u8 should_offload:1, /* set indicates this neigh is connected and
-			      * should be put to KVD linear area of this group.
+	u8 should_offload:1, /* set indicates this nexthop should be written
+			      * to the adjacency table.
 			      */
-	   offloaded:1, /* set in case the neigh is actually put into
-			 * KVD linear area of this group.
+	   offloaded:1, /* set indicates this nexthop was written to the
+			 * adjacency table.
 			 */
-	   update:1, /* set indicates that MAC of this neigh should be
-		      * updated in HW
+	   update:1; /* set indicates this nexthop should be updated in the
+		      * adjacency table (f.e., its MAC changed).
 		      */
-	   discard:1; /* nexthop is programmed to discard packets */
+	enum mlxsw_sp_nexthop_action action;
 	enum mlxsw_sp_nexthop_type type;
 	union {
 		struct mlxsw_sp_neigh_entry *neigh_entry;
@@ -2894,7 +2911,9 @@ struct mlxsw_sp_nexthop_group_info {
 	u16 count;
 	int sum_norm_weight;
 	u8 adj_index_valid:1,
-	   gateway:1; /* routes using the group use a gateway */
+	   gateway:1, /* routes using the group use a gateway */
+	   is_resilient:1;
+	struct list_head list; /* member in nh_res_grp_list */
 	struct mlxsw_sp_nexthop nexthops[0];
 #define nh_rif	nexthops[0].rif
 };
@@ -2979,14 +2998,15 @@ struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
 	return list_next_entry(nh, router_list_node);
 }
 
-bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
+bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
 {
-	return nh->offloaded;
+	return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
 }
 
 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
 {
-	if (!nh->offloaded)
+	if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
+	    !mlxsw_sp_nexthop_is_forward(nh))
 		return NULL;
 	return nh->neigh_entry->ha;
 }
@@ -3036,11 +3056,6 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
 	return false;
 }
 
-bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
-{
-	return nh->discard;
-}
-
 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
@@ -3403,20 +3418,38 @@ err_mass_update_vr:
 	return err;
 }
 
-static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				     struct mlxsw_sp_nexthop *nh)
+static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
+					 u32 adj_index,
+					 struct mlxsw_sp_nexthop *nh,
+					 bool force, char *ratr_pl)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
-	char ratr_pl[MLXSW_REG_RATR_LEN];
+	enum mlxsw_reg_ratr_op op;
+	u16 rif_index;
 
-	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
-			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
-			    adj_index, nh->rif->rif_index);
-	if (nh->discard)
+	rif_index = nh->rif ? nh->rif->rif_index :
+			      mlxsw_sp->router->lb_rif_index;
+	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
+		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
+	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
+			    adj_index, rif_index);
+	switch (nh->action) {
+	case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
+		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+		break;
+	case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
-	else
-		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+		break;
+	case MLXSW_SP_NEXTHOP_ACTION_TRAP:
+		mlxsw_reg_ratr_trap_action_set(ratr_pl,
+					       MLXSW_REG_RATR_TRAP_ACTION_TRAP);
+		mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
 	if (nh->counter_valid)
 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
 	else
@@ -3425,15 +3458,17 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
 }
 
-int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-			    struct mlxsw_sp_nexthop *nh)
+int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				struct mlxsw_sp_nexthop *nh, bool force,
+				char *ratr_pl)
 {
 	int i;
 
 	for (i = 0; i < nh->num_adj_entries; i++) {
 		int err;
 
-		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
+		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
+						    nh, force, ratr_pl);
 		if (err)
 			return err;
 	}
@@ -3443,17 +3478,20 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 
 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 					  u32 adj_index,
-					  struct mlxsw_sp_nexthop *nh)
+					  struct mlxsw_sp_nexthop *nh,
+					  bool force, char *ratr_pl)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 
 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
-	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
+	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
+					force, ratr_pl);
 }
 
 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 					u32 adj_index,
-					struct mlxsw_sp_nexthop *nh)
+					struct mlxsw_sp_nexthop *nh, bool force,
+					char *ratr_pl)
 {
 	int i;
 
@@ -3461,7 +3499,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 		int err;
 
 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
-						     nh);
+						     nh, force, ratr_pl);
 		if (err)
 			return err;
 	}
@@ -3469,11 +3507,29 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				   struct mlxsw_sp_nexthop *nh, bool force,
+				   char *ratr_pl)
+{
+	/* When action is discard or trap, the nexthop must be
+	 * programmed as an Ethernet nexthop.
+	 */
+	if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
+	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
+	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
+		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
+						   force, ratr_pl);
+	else
+		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
+						    force, ratr_pl);
+}
+
 static int
 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_nexthop_group_info *nhgi,
 			      bool reallocate)
 {
+	char ratr_pl[MLXSW_REG_RATR_LEN];
 	u32 adj_index = nhgi->adj_index; /* base */
 	struct mlxsw_sp_nexthop *nh;
 	int i;
@@ -3489,16 +3545,8 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 		if (nh->update || reallocate) {
 			int err = 0;
 
-			switch (nh->type) {
-			case MLXSW_SP_NEXTHOP_TYPE_ETH:
-				err = mlxsw_sp_nexthop_update
-					    (mlxsw_sp, adj_index, nh);
-				break;
-			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
-				err = mlxsw_sp_nexthop_ipip_update
-					    (mlxsw_sp, adj_index, nh);
-				break;
-			}
+			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
+						      true, ratr_pl);
 			if (err)
 				return err;
 			nh->update = 0;
@@ -3524,34 +3572,69 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
+struct mlxsw_sp_adj_grp_size_range {
+	u16 start; /* Inclusive */
+	u16 end; /* Inclusive */
+};
+
+/* Ordered by range start value */
+static const struct mlxsw_sp_adj_grp_size_range
+mlxsw_sp1_adj_grp_size_ranges[] = {
+	{ .start = 1, .end = 64 },
+	{ .start = 512, .end = 512 },
+	{ .start = 1024, .end = 1024 },
+	{ .start = 2048, .end = 2048 },
+	{ .start = 4096, .end = 4096 },
+};
+
+/* Ordered by range start value */
+static const struct mlxsw_sp_adj_grp_size_range
+mlxsw_sp2_adj_grp_size_ranges[] = {
+	{ .start = 1, .end = 128 },
+	{ .start = 256, .end = 256 },
+	{ .start = 512, .end = 512 },
+	{ .start = 1024, .end = 1024 },
+	{ .start = 2048, .end = 2048 },
+	{ .start = 4096, .end = 4096 },
+};
+
+static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
+					   u16 *p_adj_grp_size)
 {
-	/* Valid sizes for an adjacency group are:
-	 * 1-64, 512, 1024, 2048 and 4096.
-	 */
-	if (*p_adj_grp_size <= 64)
-		return;
-	else if (*p_adj_grp_size <= 512)
-		*p_adj_grp_size = 512;
-	else if (*p_adj_grp_size <= 1024)
-		*p_adj_grp_size = 1024;
-	else if (*p_adj_grp_size <= 2048)
-		*p_adj_grp_size = 2048;
-	else
-		*p_adj_grp_size = 4096;
+	int i;
+
+	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
+		const struct mlxsw_sp_adj_grp_size_range *size_range;
+
+		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
+
+		if (*p_adj_grp_size >= size_range->start &&
+		    *p_adj_grp_size <= size_range->end)
+			return;
+
+		if (*p_adj_grp_size <= size_range->end) {
+			*p_adj_grp_size = size_range->end;
+			return;
+		}
+	}
 }
 
-static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
+static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
+					     u16 *p_adj_grp_size,
 					     unsigned int alloc_size)
 {
-	if (alloc_size >= 4096)
-		*p_adj_grp_size = 4096;
-	else if (alloc_size >= 2048)
-		*p_adj_grp_size = 2048;
-	else if (alloc_size >= 1024)
-		*p_adj_grp_size = 1024;
-	else if (alloc_size >= 512)
-		*p_adj_grp_size = 512;
+	int i;
+
+	for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
+		const struct mlxsw_sp_adj_grp_size_range *size_range;
+
+		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
+
+		if (alloc_size >= size_range->end) {
+			*p_adj_grp_size = size_range->end;
+			return;
+		}
+	}
 }
 
 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
@@ -3563,7 +3646,7 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
 	/* Round up the requested group size to the next size supported
 	 * by the device and make sure the request can be satisfied.
 	 */
-	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
+	mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
 					      *p_adj_grp_size, &alloc_size);
@@ -3573,7 +3656,7 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
 	 * entries than requested. Try to use as much of them as
 	 * possible.
 	 */
-	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
+	mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
 
 	return 0;
 }
@@ -3681,9 +3764,29 @@ mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void
+mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
+					const struct mlxsw_sp_nexthop *nh,
+					u16 bucket_index)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
+	bool offload = false, trap = false;
+
+	if (nh->offloaded) {
+		if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
+			trap = true;
+		else
+			offload = true;
+	}
+	nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
+				    bucket_index, offload, trap);
+}
+
+static void
 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
 					   struct mlxsw_sp_nexthop_group *nh_grp)
 {
+	int i;
+
 	/* Do not update the flags if the nexthop group is being destroyed
 	 * since:
 	 * 1. The nexthop objects is being deleted, in which case the flags are
@@ -3697,6 +3800,18 @@ mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
 
 	nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
 			     nh_grp->nhgi->adj_index_valid, false);
+
+	/* Update flags of individual nexthop buckets in case of a resilient
+	 * nexthop group.
+	 */
+	if (!nh_grp->nhgi->is_resilient)
+		return;
+
+	for (i = 0; i < nh_grp->nhgi->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
+
+		mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
+	}
 }
 
 static void
@@ -3750,6 +3865,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
 			goto set_trap;
 		}
+		/* Flags of individual nexthop buckets might need to be
+		 * updated.
+		 */
+		mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
 		return 0;
 	}
 	mlxsw_sp_nexthop_group_normalize(nhgi);
@@ -3832,10 +3951,15 @@ set_trap:
 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
 					    bool removing)
 {
-	if (!removing)
+	if (!removing) {
+		nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
 		nh->should_offload = 1;
-	else
+	} else if (nh->nhgi->is_resilient) {
+		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
+		nh->should_offload = 1;
+	} else {
 		nh->should_offload = 0;
+	}
 	nh->update = 1;
 }
 
@@ -4250,6 +4374,85 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
+static void
+mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
+			     const struct mlxsw_sp_nexthop_group *nh_grp,
+			     unsigned long *activity)
+{
+	char *ratrad_pl;
+	int i, err;
+
+	ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
+	if (!ratrad_pl)
+		return;
+
+	mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
+			      nh_grp->nhgi->count);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
+	if (err)
+		goto out;
+
+	for (i = 0; i < nh_grp->nhgi->count; i++) {
+		if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
+			continue;
+		bitmap_set(activity, i, 1);
+	}
+
+out:
+	kfree(ratrad_pl);
+}
+
+#define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
+
+static void
+mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
+				const struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	unsigned long *activity;
+
+	activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
+	if (!activity)
+		return;
+
+	mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
+	nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
+					nh_grp->nhgi->count, activity);
+
+	bitmap_free(activity);
+}
+
+static void
+mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
+
+	mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
+			       msecs_to_jiffies(interval));
+}
+
+static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
+{
+	struct mlxsw_sp_nexthop_group_info *nhgi;
+	struct mlxsw_sp_router *router;
+	bool reschedule = false;
+
+	router = container_of(work, struct mlxsw_sp_router,
+			      nh_grp_activity_dw.work);
+
+	mutex_lock(&router->lock);
+
+	list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
+		mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
+		reschedule = true;
+	}
+
+	mutex_unlock(&router->lock);
+
+	if (!reschedule)
+		return;
+	mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
+}
+
 static int
 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
 				     const struct nh_notifier_single_info *nh,
@@ -4268,6 +4471,29 @@ mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int
+mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
+					  const struct nh_notifier_single_info *nh,
+					  struct netlink_ext_ack *extack)
+{
+	int err;
+
+	err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
+	if (err)
+		return err;
+
+	/* Device only nexthops with an IPIP device are programmed as
+	 * encapsulating adjacency entries.
+	 */
+	if (!nh->gw_family && !nh->is_reject &&
+	    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
+		NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
 				    const struct nh_notifier_grp_info *nh_grp,
 				    struct netlink_ext_ack *extack)
@@ -4284,21 +4510,83 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
 		int err;
 
 		nh = &nh_grp->nh_entries[i].nh;
-		err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh,
-							   extack);
+		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
+								extack);
 		if (err)
 			return err;
+	}
 
-		/* Device only nexthops with an IPIP device are programmed as
-		 * encapsulating adjacency entries.
-		 */
-		if (!nh->gw_family && !nh->is_reject &&
-		    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
-			NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
-			return -EINVAL;
+	return 0;
+}
+
+static int
+mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
+					     const struct nh_notifier_res_table_info *nh_res_table,
+					     struct netlink_ext_ack *extack)
+{
+	unsigned int alloc_size;
+	bool valid_size = false;
+	int err, i;
+
+	if (nh_res_table->num_nh_buckets < 32) {
+		NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
+		const struct mlxsw_sp_adj_grp_size_range *size_range;
+
+		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
+
+		if (nh_res_table->num_nh_buckets >= size_range->start &&
+		    nh_res_table->num_nh_buckets <= size_range->end) {
+			valid_size = true;
+			break;
 		}
 	}
 
+	if (!valid_size) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
+		return -EINVAL;
+	}
+
+	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
+					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+					      nh_res_table->num_nh_buckets,
+					      &alloc_size);
+	if (err || nh_res_table->num_nh_buckets != alloc_size) {
+		NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
+					const struct nh_notifier_res_table_info *nh_res_table,
+					struct netlink_ext_ack *extack)
+{
+	int err;
+	u16 i;
+
+	err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
+							   nh_res_table,
+							   extack);
+	if (err)
+		return err;
+
+	for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
+		const struct nh_notifier_single_info *nh;
+		int err;
+
+		nh = &nh_res_table->nhs[i];
+		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
+								extack);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -4306,7 +4594,11 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
 					 unsigned long event,
 					 struct nh_notifier_info *info)
 {
-	if (event != NEXTHOP_EVENT_REPLACE)
+	struct nh_notifier_single_info *nh;
+
+	if (event != NEXTHOP_EVENT_REPLACE &&
+	    event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
+	    event != NEXTHOP_EVENT_BUCKET_REPLACE)
 		return 0;
 
 	switch (info->type) {
@@ -4317,6 +4609,14 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
 		return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
 							   info->nh_grp,
 							   info->extack);
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+		return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
+							       info->nh_res_table,
+							       info->extack);
+	case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
+		nh = &info->nh_res_bucket->new_nh;
+		return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
+								 info->extack);
 	default:
 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
 		return -EOPNOTSUPP;
@@ -4334,6 +4634,7 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
 		return info->nh->gw_family || info->nh->is_reject ||
 		       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
 	case NH_NOTIFIER_INFO_TYPE_GRP:
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
 		/* Already validated earlier. */
 		return true;
 	default:
@@ -4346,7 +4647,7 @@ static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
 {
 	u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
 
-	nh->discard = 1;
+	nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
 	nh->should_offload = 1;
 	/* While nexthops that discard packets do not forward packets
 	 * via an egress RIF, they still need to be programmed using a
@@ -4398,6 +4699,15 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
 	if (nh_obj->is_reject)
 		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
 
+	/* In a resilient nexthop group, all the nexthops must be written to
+	 * the adjacency table. Even if they do not have a valid neighbour or
+	 * RIF.
+	 */
+	if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
+		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
+		nh->should_offload = 1;
+	}
+
 	return 0;
 
 err_type_init:
@@ -4409,11 +4719,12 @@ err_type_init:
 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_nexthop *nh)
 {
-	if (nh->discard)
+	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
 		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
 	list_del(&nh->router_list_node);
 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+	nh->should_offload = 0;
 }
 
 static int
@@ -4423,6 +4734,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_nexthop_group_info *nhgi;
 	struct mlxsw_sp_nexthop *nh;
+	bool is_resilient = false;
 	unsigned int nhs;
 	int err, i;
 
@@ -4433,6 +4745,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 	case NH_NOTIFIER_INFO_TYPE_GRP:
 		nhs = info->nh_grp->num_nh;
 		break;
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+		nhs = info->nh_res_table->num_nh_buckets;
+		is_resilient = true;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -4443,6 +4759,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 	nh_grp->nhgi = nhgi;
 	nhgi->nh_grp = nh_grp;
 	nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
+	nhgi->is_resilient = is_resilient;
 	nhgi->count = nhs;
 	for (i = 0; i < nhgi->count; i++) {
 		struct nh_notifier_single_info *nh_obj;
@@ -4458,6 +4775,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 			nh_obj = &info->nh_grp->nh_entries[i].nh;
 			weight = info->nh_grp->nh_entries[i].weight;
 			break;
+		case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+			nh_obj = &info->nh_res_table->nhs[i];
+			weight = 1;
+			break;
 		default:
 			err = -EINVAL;
 			goto err_nexthop_obj_init;
@@ -4473,6 +4794,15 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 		goto err_group_refresh;
 	}
 
+	/* Add resilient nexthop groups to a list so that the activity of their
+	 * nexthop buckets will be periodically queried and cleared.
+	 */
+	if (nhgi->is_resilient) {
+		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
+			mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
+		list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
+	}
+
 	return 0;
 
 err_group_refresh:
@@ -4491,8 +4821,15 @@ mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_nexthop_group *nh_grp)
 {
 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
 	int i;
 
+	if (nhgi->is_resilient) {
+		list_del(&nhgi->list);
+		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
+			cancel_delayed_work(&router->nh_grp_activity_dw);
+	}
+
 	for (i = nhgi->count - 1; i >= 0; i--) {
 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
 
@@ -4685,6 +5022,135 @@ static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
 }
 
+static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
+					     u32 adj_index, char *ratr_pl)
+{
+	MLXSW_REG_ZERO(ratr, ratr_pl);
+	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
+	mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
+	mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
+
+	return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
+{
+	/* Clear the opcode and activity on both the old and new payload as
+	 * they are irrelevant for the comparison.
+	 */
+	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
+	mlxsw_reg_ratr_a_set(ratr_pl, 0);
+	mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
+	mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
+
+	/* If the contents of the adjacency entry are consistent with the
+	 * replacement request, then replacement was successful.
+	 */
+	if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
+		return 0;
+
+	return -EINVAL;
+}
+
+static int
+mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nexthop *nh,
+				       struct nh_notifier_info *info)
+{
+	u16 bucket_index = info->nh_res_bucket->bucket_index;
+	struct netlink_ext_ack *extack = info->extack;
+	bool force = info->nh_res_bucket->force;
+	char ratr_pl_new[MLXSW_REG_RATR_LEN];
+	char ratr_pl[MLXSW_REG_RATR_LEN];
+	u32 adj_index;
+	int err;
+
+	/* No point in trying an atomic replacement if the idle timer interval
+	 * is smaller than the interval in which we query and clear activity.
+	 */
+	force = info->nh_res_bucket->idle_timer_ms <
+		MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
+
+	adj_index = nh->nhgi->adj_index + bucket_index;
+	err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
+		return err;
+	}
+
+	if (!force) {
+		err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
+							ratr_pl_new);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
+			return err;
+		}
+
+		err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
+			return err;
+		}
+	}
+
+	nh->update = 0;
+	nh->offloaded = 1;
+	mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
+
+	return 0;
+}
+
+static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
+					       struct nh_notifier_info *info)
+{
+	u16 bucket_index = info->nh_res_bucket->bucket_index;
+	struct netlink_ext_ack *extack = info->extack;
+	struct mlxsw_sp_nexthop_group_info *nhgi;
+	struct nh_notifier_single_info *nh_obj;
+	struct mlxsw_sp_nexthop_group *nh_grp;
+	struct mlxsw_sp_nexthop *nh;
+	int err;
+
+	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
+	if (!nh_grp) {
+		NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
+		return -EINVAL;
+	}
+
+	nhgi = nh_grp->nhgi;
+
+	if (bucket_index >= nhgi->count) {
+		NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
+		return -EINVAL;
+	}
+
+	nh = &nhgi->nexthops[bucket_index];
+	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
+
+	nh_obj = &info->nh_res_bucket->new_nh;
+	err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
+		goto err_nexthop_obj_init;
+	}
+
+	err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
+	if (err)
+		goto err_nexthop_obj_bucket_adj_update;
+
+	return 0;
+
+err_nexthop_obj_bucket_adj_update:
+	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
+err_nexthop_obj_init:
+	nh_obj = &info->nh_res_bucket->old_nh;
+	mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
+	/* The old adjacency entry was not overwritten */
+	nh->update = 0;
+	nh->offloaded = 1;
+	return err;
+}
+
 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
 				      unsigned long event, void *ptr)
 {
@@ -4699,8 +5165,6 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
 
 	mutex_lock(&router->lock);
 
-	ASSERT_RTNL();
-
 	switch (event) {
 	case NEXTHOP_EVENT_REPLACE:
 		err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
@@ -4708,6 +5172,10 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
 	case NEXTHOP_EVENT_DEL:
 		mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
 		break;
+	case NEXTHOP_EVENT_BUCKET_REPLACE:
+		err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
+							  info);
+		break;
 	default:
 		break;
 	}
@@ -7667,7 +8135,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	int i, err;
 
 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
-	ops = mlxsw_sp->rif_ops_arr[type];
+	ops = mlxsw_sp->router->rif_ops_arr[type];
 
 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
 	if (IS_ERR(vr))
@@ -8865,7 +9333,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
 };
 
-const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
+static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
@@ -9050,7 +9518,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
 };
 
-const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
+static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
@@ -9302,6 +9770,36 @@ static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
 }
 
+static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
+
+	mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
+	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
+	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
+
+	return 0;
+}
+
+const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
+	.init = mlxsw_sp1_router_init,
+};
+
+static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
+
+	mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
+	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
+	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
+
+	return 0;
+}
+
+const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
+	.init = mlxsw_sp2_router_init,
+};
+
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 			 struct netlink_ext_ack *extack)
 {
@@ -9315,6 +9813,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp->router = router;
 	router->mlxsw_sp = mlxsw_sp;
 
+	err = mlxsw_sp->router_ops->init(mlxsw_sp);
+	if (err)
+		goto err_router_ops_init;
+
 	err = mlxsw_sp_router_xm_init(mlxsw_sp);
 	if (err)
 		goto err_xm_init;
@@ -9328,6 +9830,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_ll_op_ctx_init;
 
+	INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
+	INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
+			  mlxsw_sp_nh_grp_activity_work);
+
 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
 	err = __mlxsw_sp_router_init(mlxsw_sp);
 	if (err)
@@ -9451,10 +9957,12 @@ err_ipips_init:
 err_rifs_init:
 	__mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
 	mlxsw_sp_router_ll_op_ctx_fini(router);
 err_ll_op_ctx_init:
 	mlxsw_sp_router_xm_fini(mlxsw_sp);
 err_xm_init:
+err_router_ops_init:
 	mutex_destroy(&mlxsw_sp->router->lock);
 	kfree(mlxsw_sp->router);
 	return err;
@@ -9481,6 +9989,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp_ipips_fini(mlxsw_sp);
 	mlxsw_sp_rifs_fini(mlxsw_sp);
 	__mlxsw_sp_router_fini(mlxsw_sp);
+	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
 	mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router);
 	mlxsw_sp_router_xm_fini(mlxsw_sp);
 	mutex_destroy(&mlxsw_sp->router->lock);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 2875ee8ec537..be7708a375e1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -78,6 +78,10 @@ struct mlxsw_sp_router {
 	struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx;
 	u16 lb_rif_index;
 	struct mlxsw_sp_router_xm *xm;
+	const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges;
+	size_t adj_grp_size_ranges_count;
+	struct delayed_work nh_grp_activity_dw;
+	struct list_head nh_res_grp_list;
 };
 
 struct mlxsw_sp_fib_entry_priv {
@@ -195,20 +199,20 @@ mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
 				     const struct mlxsw_sp_ipip_entry *except);
 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
 					       struct mlxsw_sp_nexthop *nh);
-bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh);
 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh);
 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
 			     u32 *p_adj_size, u32 *p_adj_hash_index);
 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
-bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh);
 #define mlxsw_sp_nexthop_for_each(nh, router)				\
 	for (nh = mlxsw_sp_nexthop_next(router, NULL); nh;		\
 	     nh = mlxsw_sp_nexthop_next(router, nh))
 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter);
-int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-			    struct mlxsw_sp_nexthop *nh);
+int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				struct mlxsw_sp_nexthop *nh, bool force,
+				char *ratr_pl);
 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_nexthop *nh);
 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 1892cea05ee7..3398cc01e5ec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -186,6 +186,7 @@ mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
 	/* Create a new port analayzer entry for local_port. */
 	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
 			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+	mlxsw_reg_mpat_session_id_set(mpat_pl, sparms.session_id);
 	mlxsw_reg_mpat_pide_set(mpat_pl, sparms.policer_enable);
 	mlxsw_reg_mpat_pid_set(mpat_pl, sparms.policer_id);
 
@@ -203,6 +204,7 @@ mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
 	int pa_id = span_entry->id;
 
 	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+	mlxsw_reg_mpat_session_id_set(mpat_pl, span_entry->parms.session_id);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
 }
 
@@ -938,7 +940,8 @@ mlxsw_sp_span_entry_find_by_parms(struct mlxsw_sp *mlxsw_sp,
 
 		if (refcount_read(&curr->ref_count) && curr->to_dev == to_dev &&
 		    curr->parms.policer_enable == sparms->policer_enable &&
-		    curr->parms.policer_id == sparms->policer_id)
+		    curr->parms.policer_id == sparms->policer_id &&
+		    curr->parms.session_id == sparms->session_id)
 			return curr;
 	}
 	return NULL;
@@ -1085,6 +1088,7 @@ int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp, int *p_span_id,
 
 	sparms.policer_id = parms->policer_id;
 	sparms.policer_enable = parms->policer_enable;
+	sparms.session_id = parms->session_id;
 	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
 	if (!span_entry)
 		return -ENOBUFS;
@@ -1227,8 +1231,12 @@ __mlxsw_sp_span_trigger_port_bind(struct mlxsw_sp_span *span,
 		return -EINVAL;
 	}
 
+	if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAR_RATE_MAX)
+		return -EINVAL;
+
 	mlxsw_reg_mpar_pack(mpar_pl, trigger_entry->local_port, i_e, enable,
-			    trigger_entry->parms.span_id);
+			    trigger_entry->parms.span_id,
+			    trigger_entry->parms.probability_rate);
 	return mlxsw_reg_write(span->mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
 }
 
@@ -1362,8 +1370,11 @@ mlxsw_sp2_span_trigger_global_bind(struct mlxsw_sp_span_trigger_entry *
 		return -EINVAL;
 	}
 
+	if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAGR_RATE_MAX)
+		return -EINVAL;
+
 	mlxsw_reg_mpagr_pack(mpagr_pl, trigger, trigger_entry->parms.span_id,
-			     1);
+			     trigger_entry->parms.probability_rate);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpagr), mpagr_pl);
 }
 
@@ -1561,7 +1572,9 @@ int mlxsw_sp_span_agent_bind(struct mlxsw_sp *mlxsw_sp,
 							 trigger,
 							 mlxsw_sp_port);
 	if (trigger_entry) {
-		if (trigger_entry->parms.span_id != parms->span_id)
+		if (trigger_entry->parms.span_id != parms->span_id ||
+		    trigger_entry->parms.probability_rate !=
+		    parms->probability_rate)
 			return -EINVAL;
 		refcount_inc(&trigger_entry->ref_count);
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index aa1cd409c0e2..efaefd1ae863 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -13,6 +13,19 @@
 struct mlxsw_sp;
 struct mlxsw_sp_port;
 
+/* SPAN session identifiers that correspond to MLXSW_TRAP_ID_MIRROR_SESSION<i>
+ * trap identifiers. The session identifier is an attribute of the SPAN agent,
+ * which determines the trap identifier of packets that are mirrored to the
+ * CPU. Packets that are trapped to the CPU for the same logical reason (e.g.,
+ * buffer drops) should use the same session identifier.
+ */
+enum mlxsw_sp_span_session_id {
+	MLXSW_SP_SPAN_SESSION_ID_BUFFER,
+	MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
+
+	__MLXSW_SP_SPAN_SESSION_ID_MAX = 8,
+};
+
 struct mlxsw_sp_span_parms {
 	struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
 	unsigned int ttl;
@@ -23,6 +36,7 @@ struct mlxsw_sp_span_parms {
 	u16 vid;
 	u16 policer_id;
 	bool policer_enable;
+	enum mlxsw_sp_span_session_id session_id;
 };
 
 enum mlxsw_sp_span_trigger {
@@ -35,12 +49,14 @@ enum mlxsw_sp_span_trigger {
 
 struct mlxsw_sp_span_trigger_parms {
 	int span_id;
+	u32 probability_rate;
 };
 
 struct mlxsw_sp_span_agent_parms {
 	const struct net_device *to_dev;
 	u16 policer_id;
 	bool policer_enable;
+	enum mlxsw_sp_span_session_id session_id;
 };
 
 struct mlxsw_sp_span_entry_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 23b7e8d6386b..c1f05c17557d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -98,6 +98,10 @@ struct mlxsw_sp_bridge_ops {
 		       const struct mlxsw_sp_fid *fid);
 };
 
+struct mlxsw_sp_switchdev_ops {
+	void (*init)(struct mlxsw_sp *mlxsw_sp);
+};
+
 static int
 mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_bridge_port *bridge_port,
@@ -2296,7 +2300,7 @@ mlxsw_sp_bridge_8021ad_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
 						     vid, ETH_P_8021AD, extack);
 }
 
-static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021ad_ops = {
+static const struct mlxsw_sp_bridge_ops mlxsw_sp1_bridge_8021ad_ops = {
 	.port_join	= mlxsw_sp_bridge_8021ad_port_join,
 	.port_leave	= mlxsw_sp_bridge_8021ad_port_leave,
 	.vxlan_join	= mlxsw_sp_bridge_8021ad_vxlan_join,
@@ -2305,6 +2309,53 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021ad_ops = {
 	.fid_vid	= mlxsw_sp_bridge_8021q_fid_vid,
 };
 
+static int
+mlxsw_sp2_bridge_8021ad_port_join(struct mlxsw_sp_bridge_device *bridge_device,
+				  struct mlxsw_sp_bridge_port *bridge_port,
+				  struct mlxsw_sp_port *mlxsw_sp_port,
+				  struct netlink_ext_ack *extack)
+{
+	int err;
+
+	/* The EtherType of decapsulated packets is determined at the egress
+	 * port to allow 802.1d and 802.1ad bridges with VXLAN devices to
+	 * co-exist.
+	 */
+	err = mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021AD);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_bridge_8021ad_port_join(bridge_device, bridge_port,
+					       mlxsw_sp_port, extack);
+	if (err)
+		goto err_bridge_8021ad_port_join;
+
+	return 0;
+
+err_bridge_8021ad_port_join:
+	mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q);
+	return err;
+}
+
+static void
+mlxsw_sp2_bridge_8021ad_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
+				   struct mlxsw_sp_bridge_port *bridge_port,
+				   struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	mlxsw_sp_bridge_8021ad_port_leave(bridge_device, bridge_port,
+					  mlxsw_sp_port);
+	mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q);
+}
+
+static const struct mlxsw_sp_bridge_ops mlxsw_sp2_bridge_8021ad_ops = {
+	.port_join	= mlxsw_sp2_bridge_8021ad_port_join,
+	.port_leave	= mlxsw_sp2_bridge_8021ad_port_leave,
+	.vxlan_join	= mlxsw_sp_bridge_8021ad_vxlan_join,
+	.fid_get	= mlxsw_sp_bridge_8021q_fid_get,
+	.fid_lookup	= mlxsw_sp_bridge_8021q_fid_lookup,
+	.fid_vid	= mlxsw_sp_bridge_8021q_fid_vid,
+};
+
 int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct net_device *brport_dev,
 			      struct net_device *br_dev,
@@ -3535,6 +3586,24 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
 	unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier);
 }
 
+static void mlxsw_sp1_switchdev_init(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp1_bridge_8021ad_ops;
+}
+
+const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops = {
+	.init	= mlxsw_sp1_switchdev_init,
+};
+
+static void mlxsw_sp2_switchdev_init(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp2_bridge_8021ad_ops;
+}
+
+const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops = {
+	.init	= mlxsw_sp2_switchdev_init,
+};
+
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_bridge *bridge;
@@ -3549,7 +3618,8 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 
 	bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops;
 	bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops;
-	bridge->bridge_8021ad_ops = &mlxsw_sp_bridge_8021ad_ops;
+
+	mlxsw_sp->switchdev_ops->init(mlxsw_sp);
 
 	return mlxsw_sp_fdb_init(mlxsw_sp);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 4ef12e3e021a..26d01adbedad 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -49,8 +49,14 @@ enum {
 #define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
 
 enum {
+	/* Packet was mirrored from ingress. */
+	MLXSW_SP_MIRROR_REASON_INGRESS = 1,
+	/* Packet was mirrored from policy engine. */
+	MLXSW_SP_MIRROR_REASON_POLICY_ENGINE = 2,
 	/* Packet was early dropped. */
 	MLXSW_SP_MIRROR_REASON_INGRESS_WRED = 9,
+	/* Packet was mirrored from egress. */
+	MLXSW_SP_MIRROR_REASON_EGRESS = 14,
 };
 
 static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
@@ -106,7 +112,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
 static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port,
 					  void *trap_ctx)
 {
-	u32 cookie_index = mlxsw_skb_cb(skb)->cookie_index;
+	u32 cookie_index = mlxsw_skb_cb(skb)->rx_md_info.cookie_index;
 	const struct flow_action_cookie *fa_cookie;
 	struct devlink_port *in_devlink_port;
 	struct mlxsw_sp_port *mlxsw_sp_port;
@@ -202,21 +208,175 @@ static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port,
 	mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port);
 }
 
+static struct mlxsw_sp_port *
+mlxsw_sp_sample_tx_port_get(struct mlxsw_sp *mlxsw_sp,
+			    const struct mlxsw_rx_md_info *rx_md_info)
+{
+	u8 local_port;
+
+	if (!rx_md_info->tx_port_valid)
+		return NULL;
+
+	if (rx_md_info->tx_port_is_lag)
+		local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core,
+							rx_md_info->tx_lag_id,
+							rx_md_info->tx_lag_port_index);
+	else
+		local_port = rx_md_info->tx_sys_port;
+
+	if (local_port >= mlxsw_core_max_ports(mlxsw_sp->core))
+		return NULL;
+
+	return mlxsw_sp->ports[local_port];
+}
+
+/* The latency units are determined according to MOGCR.mirror_latency_units. It
+ * defaults to 64 nanoseconds.
+ */
+#define MLXSW_SP_MIRROR_LATENCY_SHIFT	6
+
+static void mlxsw_sp_psample_md_init(struct mlxsw_sp *mlxsw_sp,
+				     struct psample_metadata *md,
+				     struct sk_buff *skb, int in_ifindex,
+				     bool truncate, u32 trunc_size)
+{
+	struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	md->trunc_size = truncate ? trunc_size : skb->len;
+	md->in_ifindex = in_ifindex;
+	mlxsw_sp_port = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info);
+	md->out_ifindex = mlxsw_sp_port && mlxsw_sp_port->dev ?
+			  mlxsw_sp_port->dev->ifindex : 0;
+	md->out_tc_valid = rx_md_info->tx_tc_valid;
+	md->out_tc = rx_md_info->tx_tc;
+	md->out_tc_occ_valid = rx_md_info->tx_congestion_valid;
+	md->out_tc_occ = rx_md_info->tx_congestion;
+	md->latency_valid = rx_md_info->latency_valid;
+	md->latency = rx_md_info->latency;
+	md->latency <<= MLXSW_SP_MIRROR_LATENCY_SHIFT;
+}
+
 static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 					void *trap_ctx)
 {
 	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	struct mlxsw_sp_sample_trigger trigger;
+	struct mlxsw_sp_sample_params *params;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct psample_metadata md = {};
+	int err;
+
+	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+	if (err)
+		return;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto out;
+
+	trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS;
+	trigger.local_port = local_port;
+	params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+	if (!params)
+		goto out;
+
+	/* The psample module expects skb->data to point to the start of the
+	 * Ethernet header.
+	 */
+	skb_push(skb, ETH_HLEN);
+	mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+				 mlxsw_sp_port->dev->ifindex, params->truncate,
+				 params->trunc_size);
+	psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+	consume_skb(skb);
+}
+
+static void mlxsw_sp_rx_sample_tx_listener(struct sk_buff *skb, u8 local_port,
+					   void *trap_ctx)
+{
+	struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info;
+	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	struct mlxsw_sp_port *mlxsw_sp_port, *mlxsw_sp_port_tx;
+	struct mlxsw_sp_sample_trigger trigger;
+	struct mlxsw_sp_sample_params *params;
+	struct psample_metadata md = {};
+	int err;
+
+	/* Locally generated packets are not reported from the policy engine
+	 * trigger, so do not report them from the egress trigger as well.
+	 */
+	if (local_port == MLXSW_PORT_CPU_PORT)
+		goto out;
+
+	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+	if (err)
+		return;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto out;
+
+	/* Packet was sampled from Tx, so we need to retrieve the sample
+	 * parameters based on the Tx port and not the Rx port.
+	 */
+	mlxsw_sp_port_tx = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info);
+	if (!mlxsw_sp_port_tx)
+		goto out;
+
+	trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS;
+	trigger.local_port = mlxsw_sp_port_tx->local_port;
+	params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+	if (!params)
+		goto out;
+
+	/* The psample module expects skb->data to point to the start of the
+	 * Ethernet header.
+	 */
+	skb_push(skb, ETH_HLEN);
+	mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+				 mlxsw_sp_port->dev->ifindex, params->truncate,
+				 params->trunc_size);
+	psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+	consume_skb(skb);
+}
+
+static void mlxsw_sp_rx_sample_acl_listener(struct sk_buff *skb, u8 local_port,
+					    void *trap_ctx)
+{
+	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	struct mlxsw_sp_sample_trigger trigger = {
+		.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+	};
+	struct mlxsw_sp_sample_params *params;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct psample_metadata md = {};
 	int err;
 
 	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
 	if (err)
 		return;
 
-	/* The sample handler expects skb->data to point to the start of the
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto out;
+
+	params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+	if (!params)
+		goto out;
+
+	/* The psample module expects skb->data to point to the start of the
 	 * Ethernet header.
 	 */
 	skb_push(skb, ETH_HLEN);
-	mlxsw_sp_sample_receive(mlxsw_sp, skb, local_port);
+	mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+				 mlxsw_sp_port->dev->ifindex, params->truncate,
+				 params->trunc_size);
+	psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+	consume_skb(skb);
 }
 
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
@@ -464,11 +624,6 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.priority = 2,
 	},
 	{
-		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
-		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
-		.priority = 0,
-	},
-	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_TRAP, 18),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
 		.priority = 4,
@@ -993,14 +1148,6 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 		},
 	},
 	{
-		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
-					      MIRROR),
-		.listeners_arr = {
-			MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
-				  MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
-		},
-	},
-	{
 		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_TRAP, ACL_TRAP, TRAP),
 		.listeners_arr = {
 			MLXSW_SP_RXL_NO_MARK(ACL0, FLOW_LOGGING, TRAP_TO_CPU,
@@ -1709,10 +1856,23 @@ int mlxsw_sp_trap_group_policer_hw_id_get(struct mlxsw_sp *mlxsw_sp, u16 id,
 
 static const struct mlxsw_sp_trap_group_item
 mlxsw_sp1_trap_group_items_arr[] = {
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+		.priority = 0,
+	},
 };
 
 static const struct mlxsw_sp_trap_item
 mlxsw_sp1_trap_items_arr[] = {
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
+					      MIRROR),
+		.listeners_arr = {
+			MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
+				  MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
+		},
+	},
 };
 
 static int
@@ -1749,6 +1909,12 @@ mlxsw_sp2_trap_group_items_arr[] = {
 		.priority = 0,
 		.fixed_policer = true,
 	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+		.priority = 0,
+		.fixed_policer = true,
+	},
 };
 
 static const struct mlxsw_sp_trap_item
@@ -1760,6 +1926,21 @@ mlxsw_sp2_trap_items_arr[] = {
 		},
 		.is_source = true,
 	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
+					      MIRROR),
+		.listeners_arr = {
+			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_listener, 1,
+					 SP_PKT_SAMPLE,
+					 MLXSW_SP_MIRROR_REASON_INGRESS),
+			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_tx_listener, 1,
+					 SP_PKT_SAMPLE,
+					 MLXSW_SP_MIRROR_REASON_EGRESS),
+			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_acl_listener, 1,
+					 SP_PKT_SAMPLE,
+					 MLXSW_SP_MIRROR_REASON_POLICY_ENGINE),
+		},
+	},
 };
 
 static int
diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 2c0dcd7acf3f..a66a179236fd 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -222,7 +222,6 @@ static int encx24j600_wait_for_autoneg(struct encx24j600_priv *priv)
 	unsigned long timeout = jiffies + msecs_to_jiffies(2000);
 	u16 phstat1;
 	u16 estat;
-	int ret = 0;
 
 	phstat1 = encx24j600_read_phy(priv, PHSTAT1);
 	while ((phstat1 & ANDONE) == 0) {
@@ -258,7 +257,7 @@ static int encx24j600_wait_for_autoneg(struct encx24j600_priv *priv)
 		encx24j600_write_reg(priv, MACLCON, 0x370f);
 	}
 
-	return ret;
+	return 0;
 }
 
 /* Access the PHY to determine link status */
diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
index 05cb040c2677..2d3157e4d081 100644
--- a/drivers/net/ethernet/mscc/Kconfig
+++ b/drivers/net/ethernet/mscc/Kconfig
@@ -11,7 +11,7 @@ config NET_VENDOR_MICROSEMI
 
 if NET_VENDOR_MICROSEMI
 
-# Users should depend on NET_SWITCHDEV, HAS_IOMEM
+# Users should depend on NET_SWITCHDEV, HAS_IOMEM, BRIDGE
 config MSCC_OCELOT_SWITCH_LIB
 	select NET_DEVLINK
 	select REGMAP_MMIO
@@ -24,6 +24,7 @@ config MSCC_OCELOT_SWITCH_LIB
 
 config MSCC_OCELOT_SWITCH
 	tristate "Ocelot switch driver"
+	depends on BRIDGE || BRIDGE=n
 	depends on NET_SWITCHDEV
 	depends on HAS_IOMEM
 	depends on OF_NET
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 46e5c9136bac..1a36b416fd9b 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -687,7 +687,7 @@ static int ocelot_xtr_poll_xfh(struct ocelot *ocelot, int grp, u32 *xfh)
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 {
 	struct skb_shared_hwtstamps *shhwtstamps;
-	u64 tod_in_ns, full_ts_in_ns, cpuq;
+	u64 tod_in_ns, full_ts_in_ns;
 	u64 timestamp, src_port, len;
 	u32 xfh[OCELOT_TAG_LEN / 4];
 	struct net_device *dev;
@@ -704,7 +704,6 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 	ocelot_xfh_get_src_port(xfh, &src_port);
 	ocelot_xfh_get_len(xfh, &len);
 	ocelot_xfh_get_rew_val(xfh, &timestamp);
-	ocelot_xfh_get_cpuq(xfh, &cpuq);
 
 	if (WARN_ON(src_port >= ocelot->num_phys_ports))
 		return -EINVAL;
@@ -767,17 +766,11 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 	/* Everything we see on an interface that is in the HW bridge
 	 * has already been forwarded.
 	 */
-	if (ocelot->bridge_mask & BIT(src_port))
+	if (ocelot->ports[src_port]->bridge)
 		skb->offload_fwd_mark = 1;
 
 	skb->protocol = eth_type_trans(skb, dev);
 
-#if IS_ENABLED(CONFIG_BRIDGE_MRP)
-	if (skb->protocol == cpu_to_be16(ETH_P_MRP) &&
-	    cpuq & BIT(OCELOT_MRP_CPUQ))
-		skb->offload_fwd_mark = 0;
-#endif
-
 	*nskb = skb;
 
 	return 0;
@@ -1190,6 +1183,26 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
 	return mask;
 }
 
+static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot,
+				      struct net_device *bridge)
+{
+	u32 mask = 0;
+	int port;
+
+	for (port = 0; port < ocelot->num_phys_ports; port++) {
+		struct ocelot_port *ocelot_port = ocelot->ports[port];
+
+		if (!ocelot_port)
+			continue;
+
+		if (ocelot_port->stp_state == BR_STATE_FORWARDING &&
+		    ocelot_port->bridge == bridge)
+			mask |= BIT(port);
+	}
+
+	return mask;
+}
+
 static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot)
 {
 	u32 mask = 0;
@@ -1239,10 +1252,12 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
 			 */
 			mask = GENMASK(ocelot->num_phys_ports - 1, 0);
 			mask &= ~cpu_fwd_mask;
-		} else if (ocelot->bridge_fwd_mask & BIT(port)) {
+		} else if (ocelot_port->bridge) {
+			struct net_device *bridge = ocelot_port->bridge;
 			struct net_device *bond = ocelot_port->bond;
 
-			mask = ocelot->bridge_fwd_mask & ~BIT(port);
+			mask = ocelot_get_bridge_fwd_mask(ocelot, bridge);
+			mask &= ~BIT(port);
 			if (bond) {
 				mask &= ~ocelot_get_bond_mask(ocelot, bond,
 							      false);
@@ -1263,29 +1278,16 @@ EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask);
 void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	u32 port_cfg;
+	u32 learn_ena = 0;
 
-	if (!(BIT(port) & ocelot->bridge_mask))
-		return;
-
-	port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);
+	ocelot_port->stp_state = state;
 
-	switch (state) {
-	case BR_STATE_FORWARDING:
-		ocelot->bridge_fwd_mask |= BIT(port);
-		fallthrough;
-	case BR_STATE_LEARNING:
-		if (ocelot_port->learn_ena)
-			port_cfg |= ANA_PORT_PORT_CFG_LEARN_ENA;
-		break;
+	if ((state == BR_STATE_LEARNING || state == BR_STATE_FORWARDING) &&
+	    ocelot_port->learn_ena)
+		learn_ena = ANA_PORT_PORT_CFG_LEARN_ENA;
 
-	default:
-		port_cfg &= ~ANA_PORT_PORT_CFG_LEARN_ENA;
-		ocelot->bridge_fwd_mask &= ~BIT(port);
-		break;
-	}
-
-	ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port);
+	ocelot_rmw_gix(ocelot, learn_ena, ANA_PORT_PORT_CFG_LEARN_ENA,
+		       ANA_PORT_PORT_CFG, port);
 
 	ocelot_apply_bridge_fwd_mask(ocelot);
 }
@@ -1512,43 +1514,28 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
 }
 EXPORT_SYMBOL(ocelot_port_mdb_del);
 
-int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
-			    struct net_device *bridge)
+void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
+			     struct net_device *bridge)
 {
-	if (!ocelot->bridge_mask) {
-		ocelot->hw_bridge_dev = bridge;
-	} else {
-		if (ocelot->hw_bridge_dev != bridge)
-			/* This is adding the port to a second bridge, this is
-			 * unsupported */
-			return -ENODEV;
-	}
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
 
-	ocelot->bridge_mask |= BIT(port);
+	ocelot_port->bridge = bridge;
 
-	return 0;
+	ocelot_apply_bridge_fwd_mask(ocelot);
 }
 EXPORT_SYMBOL(ocelot_port_bridge_join);
 
-int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
-			     struct net_device *bridge)
+void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+			      struct net_device *bridge)
 {
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	struct ocelot_vlan pvid = {0}, native_vlan = {0};
-	int ret;
-
-	ocelot->bridge_mask &= ~BIT(port);
-
-	if (!ocelot->bridge_mask)
-		ocelot->hw_bridge_dev = NULL;
 
-	ret = ocelot_port_vlan_filtering(ocelot, port, false);
-	if (ret)
-		return ret;
+	ocelot_port->bridge = NULL;
 
 	ocelot_port_set_pvid(ocelot, port, pvid);
 	ocelot_port_set_native_vlan(ocelot, port, native_vlan);
-
-	return 0;
+	ocelot_apply_bridge_fwd_mask(ocelot);
 }
 EXPORT_SYMBOL(ocelot_port_bridge_leave);
 
@@ -2051,6 +2038,9 @@ int ocelot_init(struct ocelot *ocelot)
 
 		ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i);
 	}
+
+	ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_BLACKHOLE);
+
 	/* Allow broadcast and unknown L2 multicast to the CPU. */
 	ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
 		       ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index a41b458b1b3e..8b843d3c9189 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -220,6 +220,11 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
 						   "Last action must be GOTO");
 				return -EOPNOTSUPP;
 			}
+			if (a->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
 			filter->action.police_ena = true;
 			rate = a->police.rate_bytes_ps;
 			filter->action.pol.rate = div_u64(rate, 1000) * 8;
diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c
index 683da320bfd8..08b481a93460 100644
--- a/drivers/net/ethernet/mscc/ocelot_mrp.c
+++ b/drivers/net/ethernet/mscc/ocelot_mrp.c
@@ -1,9 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Microsemi Ocelot Switch driver
  *
- * This contains glue logic between the switchdev driver operations and the
- * mscc_ocelot_switch_lib.
- *
  * Copyright (c) 2017, 2019 Microsemi Corporation
  * Copyright 2020-2021 NXP Semiconductors
  */
@@ -15,13 +12,34 @@
 #include "ocelot.h"
 #include "ocelot_vcap.h"
 
-static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int port)
+static const u8 mrp_test_dmac[] = { 0x01, 0x15, 0x4e, 0x00, 0x00, 0x01 };
+static const u8 mrp_control_dmac[] = { 0x01, 0x15, 0x4e, 0x00, 0x00, 0x02 };
+
+static int ocelot_mrp_find_partner_port(struct ocelot *ocelot,
+					struct ocelot_port *p)
+{
+	int i;
+
+	for (i = 0; i < ocelot->num_phys_ports; ++i) {
+		struct ocelot_port *ocelot_port = ocelot->ports[i];
+
+		if (!ocelot_port || p == ocelot_port)
+			continue;
+
+		if (ocelot_port->mrp_ring_id == p->mrp_ring_id)
+			return i;
+	}
+
+	return -1;
+}
+
+static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int id)
 {
 	struct ocelot_vcap_block *block_vcap_is2;
 	struct ocelot_vcap_filter *filter;
 
 	block_vcap_is2 = &ocelot->block[VCAP_IS2];
-	filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, port,
+	filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, id,
 						     false);
 	if (!filter)
 		return 0;
@@ -29,6 +47,87 @@ static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int port)
 	return ocelot_vcap_filter_del(ocelot, filter);
 }
 
+static int ocelot_mrp_redirect_add_vcap(struct ocelot *ocelot, int src_port,
+					int dst_port)
+{
+	const u8 mrp_test_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct ocelot_vcap_filter *filter;
+	int err;
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->key_type = OCELOT_VCAP_KEY_ETYPE;
+	filter->prio = 1;
+	filter->id.cookie = src_port;
+	filter->id.tc_offload = false;
+	filter->block_id = VCAP_IS2;
+	filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+	filter->ingress_port_mask = BIT(src_port);
+	ether_addr_copy(filter->key.etype.dmac.value, mrp_test_dmac);
+	ether_addr_copy(filter->key.etype.dmac.mask, mrp_test_mask);
+	filter->action.mask_mode = OCELOT_MASK_MODE_REDIRECT;
+	filter->action.port_mask = BIT(dst_port);
+
+	err = ocelot_vcap_filter_add(ocelot, filter, NULL);
+	if (err)
+		kfree(filter);
+
+	return err;
+}
+
+static int ocelot_mrp_copy_add_vcap(struct ocelot *ocelot, int port,
+				    int prio, unsigned long cookie)
+{
+	const u8 mrp_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+	struct ocelot_vcap_filter *filter;
+	int err;
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->key_type = OCELOT_VCAP_KEY_ETYPE;
+	filter->prio = prio;
+	filter->id.cookie = cookie;
+	filter->id.tc_offload = false;
+	filter->block_id = VCAP_IS2;
+	filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+	filter->ingress_port_mask = BIT(port);
+	/* Here is possible to use control or test dmac because the mask
+	 * doesn't cover the LSB
+	 */
+	ether_addr_copy(filter->key.etype.dmac.value, mrp_test_dmac);
+	ether_addr_copy(filter->key.etype.dmac.mask, mrp_mask);
+	filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
+	filter->action.port_mask = 0x0;
+	filter->action.cpu_copy_ena = true;
+	filter->action.cpu_qu_num = OCELOT_MRP_CPUQ;
+
+	err = ocelot_vcap_filter_add(ocelot, filter, NULL);
+	if (err)
+		kfree(filter);
+
+	return err;
+}
+
+static void ocelot_mrp_save_mac(struct ocelot *ocelot,
+				struct ocelot_port *port)
+{
+	ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_test_dmac,
+			  port->pvid_vlan.vid, ENTRYTYPE_LOCKED);
+	ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_control_dmac,
+			  port->pvid_vlan.vid, ENTRYTYPE_LOCKED);
+}
+
+static void ocelot_mrp_del_mac(struct ocelot *ocelot,
+			       struct ocelot_port *port)
+{
+	ocelot_mact_forget(ocelot, mrp_test_dmac, port->pvid_vlan.vid);
+	ocelot_mact_forget(ocelot, mrp_control_dmac, port->pvid_vlan.vid);
+}
+
 int ocelot_mrp_add(struct ocelot *ocelot, int port,
 		   const struct switchdev_obj_mrp *mrp)
 {
@@ -45,18 +144,7 @@ int ocelot_mrp_add(struct ocelot *ocelot, int port,
 	if (mrp->p_port != dev && mrp->s_port != dev)
 		return 0;
 
-	if (ocelot->mrp_ring_id != 0 &&
-	    ocelot->mrp_s_port &&
-	    ocelot->mrp_p_port)
-		return -EINVAL;
-
-	if (mrp->p_port == dev)
-		ocelot->mrp_p_port = dev;
-
-	if (mrp->s_port == dev)
-		ocelot->mrp_s_port = dev;
-
-	ocelot->mrp_ring_id = mrp->ring_id;
+	ocelot_port->mrp_ring_id = mrp->ring_id;
 
 	return 0;
 }
@@ -66,33 +154,14 @@ int ocelot_mrp_del(struct ocelot *ocelot, int port,
 		   const struct switchdev_obj_mrp *mrp)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	struct ocelot_port_private *priv;
-	struct net_device *dev;
 
 	if (!ocelot_port)
 		return -EOPNOTSUPP;
 
-	priv = container_of(ocelot_port, struct ocelot_port_private, port);
-	dev = priv->dev;
-
-	if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev)
+	if (ocelot_port->mrp_ring_id != mrp->ring_id)
 		return 0;
 
-	if (ocelot->mrp_ring_id == 0 &&
-	    !ocelot->mrp_s_port &&
-	    !ocelot->mrp_p_port)
-		return -EINVAL;
-
-	if (ocelot_mrp_del_vcap(ocelot, priv->chip_port))
-		return -EINVAL;
-
-	if (ocelot->mrp_p_port == dev)
-		ocelot->mrp_p_port = NULL;
-
-	if (ocelot->mrp_s_port == dev)
-		ocelot->mrp_s_port = NULL;
-
-	ocelot->mrp_ring_id = 0;
+	ocelot_port->mrp_ring_id = 0;
 
 	return 0;
 }
@@ -102,49 +171,39 @@ int ocelot_mrp_add_ring_role(struct ocelot *ocelot, int port,
 			     const struct switchdev_obj_ring_role_mrp *mrp)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	struct ocelot_vcap_filter *filter;
-	struct ocelot_port_private *priv;
-	struct net_device *dev;
+	int dst_port;
 	int err;
 
 	if (!ocelot_port)
 		return -EOPNOTSUPP;
 
-	priv = container_of(ocelot_port, struct ocelot_port_private, port);
-	dev = priv->dev;
-
-	if (ocelot->mrp_ring_id != mrp->ring_id)
-		return -EINVAL;
-
-	if (!mrp->sw_backup)
+	if (mrp->ring_role != BR_MRP_RING_ROLE_MRC && !mrp->sw_backup)
 		return -EOPNOTSUPP;
 
-	if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev)
+	if (ocelot_port->mrp_ring_id != mrp->ring_id)
 		return 0;
 
-	filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
-	if (!filter)
-		return -ENOMEM;
+	ocelot_mrp_save_mac(ocelot, ocelot_port);
 
-	filter->key_type = OCELOT_VCAP_KEY_ETYPE;
-	filter->prio = 1;
-	filter->id.cookie = priv->chip_port;
-	filter->id.tc_offload = false;
-	filter->block_id = VCAP_IS2;
-	filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
-	filter->ingress_port_mask = BIT(priv->chip_port);
-	*(__be16 *)filter->key.etype.etype.value = htons(ETH_P_MRP);
-	*(__be16 *)filter->key.etype.etype.mask = htons(0xffff);
-	filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
-	filter->action.port_mask = 0x0;
-	filter->action.cpu_copy_ena = true;
-	filter->action.cpu_qu_num = OCELOT_MRP_CPUQ;
+	if (mrp->ring_role != BR_MRP_RING_ROLE_MRC)
+		return ocelot_mrp_copy_add_vcap(ocelot, port, 1, port);
 
-	err = ocelot_vcap_filter_add(ocelot, filter, NULL);
+	dst_port = ocelot_mrp_find_partner_port(ocelot, ocelot_port);
+	if (dst_port == -1)
+		return -EINVAL;
+
+	err = ocelot_mrp_redirect_add_vcap(ocelot, port, dst_port);
 	if (err)
-		kfree(filter);
+		return err;
 
-	return err;
+	err = ocelot_mrp_copy_add_vcap(ocelot, port, 2,
+				       port + ocelot->num_phys_ports);
+	if (err) {
+		ocelot_mrp_del_vcap(ocelot, port);
+		return err;
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(ocelot_mrp_add_ring_role);
 
@@ -152,24 +211,32 @@ int ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port,
 			     const struct switchdev_obj_ring_role_mrp *mrp)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	struct ocelot_port_private *priv;
-	struct net_device *dev;
+	int i;
 
 	if (!ocelot_port)
 		return -EOPNOTSUPP;
 
-	priv = container_of(ocelot_port, struct ocelot_port_private, port);
-	dev = priv->dev;
-
-	if (ocelot->mrp_ring_id != mrp->ring_id)
-		return -EINVAL;
-
-	if (!mrp->sw_backup)
+	if (mrp->ring_role != BR_MRP_RING_ROLE_MRC && !mrp->sw_backup)
 		return -EOPNOTSUPP;
 
-	if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev)
+	if (ocelot_port->mrp_ring_id != mrp->ring_id)
 		return 0;
 
-	return ocelot_mrp_del_vcap(ocelot, priv->chip_port);
+	ocelot_mrp_del_vcap(ocelot, port);
+	ocelot_mrp_del_vcap(ocelot, port + ocelot->num_phys_ports);
+
+	for (i = 0; i < ocelot->num_phys_ports; ++i) {
+		ocelot_port = ocelot->ports[i];
+
+		if (!ocelot_port)
+			continue;
+
+		if (ocelot_port->mrp_ring_id != 0)
+			goto out;
+	}
+
+	ocelot_mrp_del_mac(ocelot, ocelot->ports[port]);
+out:
+	return 0;
 }
 EXPORT_SYMBOL(ocelot_mrp_del_ring_role);
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 12cb6867a2d0..36f32a4d9b0f 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -251,6 +251,12 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv,
 			return -EEXIST;
 		}
 
+		if (action->police.rate_pkt_ps) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "QoS offload not support packets per second");
+			return -EOPNOTSUPP;
+		}
+
 		pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8;
 		pol.burst = action->police.burst;
 
@@ -1111,77 +1117,213 @@ static int ocelot_port_obj_del(struct net_device *dev,
 	return ret;
 }
 
-static int ocelot_netdevice_bridge_join(struct ocelot *ocelot, int port,
-					struct net_device *bridge)
+static void ocelot_inherit_brport_flags(struct ocelot *ocelot, int port,
+					struct net_device *brport_dev)
+{
+	struct switchdev_brport_flags flags = {0};
+	int flag;
+
+	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
+
+	for_each_set_bit(flag, &flags.mask, 32)
+		if (br_port_flag_is_set(brport_dev, BIT(flag)))
+			flags.val |= BIT(flag);
+
+	ocelot_port_bridge_flags(ocelot, port, flags);
+}
+
+static void ocelot_clear_brport_flags(struct ocelot *ocelot, int port)
 {
 	struct switchdev_brport_flags flags;
-	int err;
 
 	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
-	flags.val = flags.mask;
+	flags.val = flags.mask & ~BR_LEARNING;
+
+	ocelot_port_bridge_flags(ocelot, port, flags);
+}
+
+static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
+				 struct net_device *brport_dev,
+				 struct net_device *bridge_dev,
+				 struct netlink_ext_ack *extack)
+{
+	clock_t ageing_time;
+	u8 stp_state;
+	int err;
 
-	err = ocelot_port_bridge_join(ocelot, port, bridge);
+	ocelot_inherit_brport_flags(ocelot, port, brport_dev);
+
+	stp_state = br_port_get_stp_state(brport_dev);
+	ocelot_bridge_stp_state_set(ocelot, port, stp_state);
+
+	err = ocelot_port_vlan_filtering(ocelot, port,
+					 br_vlan_enabled(bridge_dev));
 	if (err)
 		return err;
 
-	ocelot_port_bridge_flags(ocelot, port, flags);
+	ageing_time = br_get_ageing_time(bridge_dev);
+	ocelot_port_attr_ageing_set(ocelot, port, ageing_time);
+
+	err = br_mdb_replay(bridge_dev, brport_dev,
+			    &ocelot_switchdev_blocking_nb, extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_fdb_replay(bridge_dev, brport_dev, &ocelot_switchdev_nb);
+	if (err)
+		return err;
+
+	err = br_vlan_replay(bridge_dev, brport_dev,
+			     &ocelot_switchdev_blocking_nb, extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
+
+static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port)
+{
+	int err;
+
+	err = ocelot_port_vlan_filtering(ocelot, port, false);
+	if (err)
+		return err;
+
+	ocelot_clear_brport_flags(ocelot, port);
+
+	ocelot_bridge_stp_state_set(ocelot, port, BR_STATE_FORWARDING);
+
+	return 0;
+}
+
+static int ocelot_netdevice_bridge_join(struct net_device *dev,
+					struct net_device *brport_dev,
+					struct net_device *bridge,
+					struct netlink_ext_ack *extack)
+{
+	struct ocelot_port_private *priv = netdev_priv(dev);
+	struct ocelot_port *ocelot_port = &priv->port;
+	struct ocelot *ocelot = ocelot_port->ocelot;
+	int port = priv->chip_port;
+	int err;
+
+	ocelot_port_bridge_join(ocelot, port, bridge);
+
+	err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack);
+	if (err)
+		goto err_switchdev_sync;
 
 	return 0;
+
+err_switchdev_sync:
+	ocelot_port_bridge_leave(ocelot, port, bridge);
+	return err;
 }
 
-static int ocelot_netdevice_bridge_leave(struct ocelot *ocelot, int port,
+static int ocelot_netdevice_bridge_leave(struct net_device *dev,
+					 struct net_device *brport_dev,
 					 struct net_device *bridge)
 {
-	struct switchdev_brport_flags flags;
+	struct ocelot_port_private *priv = netdev_priv(dev);
+	struct ocelot_port *ocelot_port = &priv->port;
+	struct ocelot *ocelot = ocelot_port->ocelot;
+	int port = priv->chip_port;
 	int err;
 
-	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
-	flags.val = flags.mask & ~BR_LEARNING;
+	err = ocelot_switchdev_unsync(ocelot, port);
+	if (err)
+		return err;
 
-	err = ocelot_port_bridge_leave(ocelot, port, bridge);
+	ocelot_port_bridge_leave(ocelot, port, bridge);
 
-	ocelot_port_bridge_flags(ocelot, port, flags);
+	return 0;
+}
 
+static int ocelot_netdevice_lag_join(struct net_device *dev,
+				     struct net_device *bond,
+				     struct netdev_lag_upper_info *info,
+				     struct netlink_ext_ack *extack)
+{
+	struct ocelot_port_private *priv = netdev_priv(dev);
+	struct ocelot_port *ocelot_port = &priv->port;
+	struct ocelot *ocelot = ocelot_port->ocelot;
+	struct net_device *bridge_dev;
+	int port = priv->chip_port;
+	int err;
+
+	err = ocelot_port_lag_join(ocelot, port, bond, info);
+	if (err == -EOPNOTSUPP) {
+		NL_SET_ERR_MSG_MOD(extack, "Offloading not supported");
+		return 0;
+	}
+
+	bridge_dev = netdev_master_upper_dev_get(bond);
+	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+		return 0;
+
+	err = ocelot_netdevice_bridge_join(dev, bond, bridge_dev, extack);
+	if (err)
+		goto err_bridge_join;
+
+	return 0;
+
+err_bridge_join:
+	ocelot_port_lag_leave(ocelot, port, bond);
 	return err;
 }
 
-static int ocelot_netdevice_changeupper(struct net_device *dev,
-					struct netdev_notifier_changeupper_info *info)
+static int ocelot_netdevice_lag_leave(struct net_device *dev,
+				      struct net_device *bond)
 {
 	struct ocelot_port_private *priv = netdev_priv(dev);
 	struct ocelot_port *ocelot_port = &priv->port;
 	struct ocelot *ocelot = ocelot_port->ocelot;
+	struct net_device *bridge_dev;
 	int port = priv->chip_port;
+
+	ocelot_port_lag_leave(ocelot, port, bond);
+
+	bridge_dev = netdev_master_upper_dev_get(bond);
+	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+		return 0;
+
+	return ocelot_netdevice_bridge_leave(dev, bond, bridge_dev);
+}
+
+static int ocelot_netdevice_changeupper(struct net_device *dev,
+					struct netdev_notifier_changeupper_info *info)
+{
+	struct netlink_ext_ack *extack;
 	int err = 0;
 
+	extack = netdev_notifier_info_to_extack(&info->info);
+
 	if (netif_is_bridge_master(info->upper_dev)) {
-		if (info->linking) {
-			err = ocelot_netdevice_bridge_join(ocelot, port,
-							   info->upper_dev);
-		} else {
-			err = ocelot_netdevice_bridge_leave(ocelot, port,
+		if (info->linking)
+			err = ocelot_netdevice_bridge_join(dev, dev,
+							   info->upper_dev,
+							   extack);
+		else
+			err = ocelot_netdevice_bridge_leave(dev, dev,
 							    info->upper_dev);
-		}
 	}
 	if (netif_is_lag_master(info->upper_dev)) {
-		if (info->linking) {
-			err = ocelot_port_lag_join(ocelot, port,
-						   info->upper_dev,
-						   info->upper_info);
-			if (err == -EOPNOTSUPP) {
-				NL_SET_ERR_MSG_MOD(info->info.extack,
-						   "Offloading not supported");
-				err = 0;
-			}
-		} else {
-			ocelot_port_lag_leave(ocelot, port,
-					      info->upper_dev);
-		}
+		if (info->linking)
+			err = ocelot_netdevice_lag_join(dev, info->upper_dev,
+							info->upper_info, extack);
+		else
+			ocelot_netdevice_lag_leave(dev, info->upper_dev);
 	}
 
 	return notifier_from_errno(err);
 }
 
+/* Treat CHANGEUPPER events on an offloaded LAG as individual CHANGEUPPER
+ * events for the lower physical ports of the LAG.
+ * If the LAG upper isn't offloaded, ignore its CHANGEUPPER events.
+ * In case the LAG joined a bridge, notify that we are offloading it and can do
+ * forwarding in hardware towards it.
+ */
 static int
 ocelot_netdevice_lag_changeupper(struct net_device *dev,
 				 struct netdev_notifier_changeupper_info *info)
@@ -1191,6 +1333,12 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev,
 	int err = NOTIFY_DONE;
 
 	netdev_for_each_lower_dev(dev, lower, iter) {
+		struct ocelot_port_private *priv = netdev_priv(lower);
+		struct ocelot_port *ocelot_port = &priv->port;
+
+		if (ocelot_port->bond != dev)
+			return NOTIFY_OK;
+
 		err = ocelot_netdevice_changeupper(lower, info);
 		if (err)
 			return notifier_from_errno(err);
diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c
index 37a232911395..7945393a0655 100644
--- a/drivers/net/ethernet/mscc/ocelot_vcap.c
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.c
@@ -761,6 +761,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix,
 			vcap_key_bytes_set(vcap, &data, VCAP_IS1_HK_ETYPE,
 					   etype.value, etype.mask);
 		}
+		break;
 	}
 	default:
 		break;
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 8f2f091bce89..9cfcd5500462 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -6657,7 +6657,7 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
 
 /**
  * s2io_set_link - Set the LInk status
- * @work: work struct containing a pointer to device private structue
+ * @work: work struct containing a pointer to device private structure
  * Description: Sets the link status for the adapter
  */
 
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index bdbf0726145e..605a1617b195 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -419,8 +419,8 @@ nfp_abm_port_get_stats_strings(struct nfp_app *app, struct nfp_port *port,
 		return data;
 	alink = repr->app_priv;
 	for (i = 0; i < alink->vnic->dp.num_r_vecs; i++) {
-		data = nfp_pr_et(data, "q%u_no_wait", i);
-		data = nfp_pr_et(data, "q%u_delayed", i);
+		ethtool_sprintf(&data, "q%u_no_wait", i);
+		ethtool_sprintf(&data, "q%u_delayed", i);
 	}
 	return data;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
index d4ce8f9ef3cc..88bea6ad59bc 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
@@ -104,6 +104,11 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
+	if (action->police.rate_pkt_ps) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not support packets per second");
+		return -EOPNOTSUPP;
+	}
+
 	rate = action->police.rate_bytes_ps;
 	burst = action->police.burst;
 	netdev_port_id = nfp_repr_get_port_id(netdev);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 9c9ae33d84ce..1b482446536d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -429,17 +429,6 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
 	return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
 
-__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...)
-{
-	va_list args;
-
-	va_start(args, fmt);
-	vsnprintf(data, ETH_GSTRING_LEN, fmt, args);
-	va_end(args);
-
-	return data + ETH_GSTRING_LEN;
-}
-
 static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
@@ -454,29 +443,29 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 	int i;
 
 	for (i = 0; i < nn->max_r_vecs; i++) {
-		data = nfp_pr_et(data, "rvec_%u_rx_pkts", i);
-		data = nfp_pr_et(data, "rvec_%u_tx_pkts", i);
-		data = nfp_pr_et(data, "rvec_%u_tx_busy", i);
+		ethtool_sprintf(&data, "rvec_%u_rx_pkts", i);
+		ethtool_sprintf(&data, "rvec_%u_tx_pkts", i);
+		ethtool_sprintf(&data, "rvec_%u_tx_busy", i);
 	}
 
-	data = nfp_pr_et(data, "hw_rx_csum_ok");
-	data = nfp_pr_et(data, "hw_rx_csum_inner_ok");
-	data = nfp_pr_et(data, "hw_rx_csum_complete");
-	data = nfp_pr_et(data, "hw_rx_csum_err");
-	data = nfp_pr_et(data, "rx_replace_buf_alloc_fail");
-	data = nfp_pr_et(data, "rx_tls_decrypted_packets");
-	data = nfp_pr_et(data, "hw_tx_csum");
-	data = nfp_pr_et(data, "hw_tx_inner_csum");
-	data = nfp_pr_et(data, "tx_gather");
-	data = nfp_pr_et(data, "tx_lso");
-	data = nfp_pr_et(data, "tx_tls_encrypted_packets");
-	data = nfp_pr_et(data, "tx_tls_ooo");
-	data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
-
-	data = nfp_pr_et(data, "hw_tls_no_space");
-	data = nfp_pr_et(data, "rx_tls_resync_req_ok");
-	data = nfp_pr_et(data, "rx_tls_resync_req_ign");
-	data = nfp_pr_et(data, "rx_tls_resync_sent");
+	ethtool_sprintf(&data, "hw_rx_csum_ok");
+	ethtool_sprintf(&data, "hw_rx_csum_inner_ok");
+	ethtool_sprintf(&data, "hw_rx_csum_complete");
+	ethtool_sprintf(&data, "hw_rx_csum_err");
+	ethtool_sprintf(&data, "rx_replace_buf_alloc_fail");
+	ethtool_sprintf(&data, "rx_tls_decrypted_packets");
+	ethtool_sprintf(&data, "hw_tx_csum");
+	ethtool_sprintf(&data, "hw_tx_inner_csum");
+	ethtool_sprintf(&data, "tx_gather");
+	ethtool_sprintf(&data, "tx_lso");
+	ethtool_sprintf(&data, "tx_tls_encrypted_packets");
+	ethtool_sprintf(&data, "tx_tls_ooo");
+	ethtool_sprintf(&data, "tx_tls_drop_no_sync_data");
+
+	ethtool_sprintf(&data, "hw_tls_no_space");
+	ethtool_sprintf(&data, "rx_tls_resync_req_ok");
+	ethtool_sprintf(&data, "rx_tls_resync_req_ign");
+	ethtool_sprintf(&data, "rx_tls_resync_sent");
 
 	return data;
 }
@@ -550,19 +539,19 @@ nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int num_vecs, bool repr)
 	swap_off = repr * NN_ET_SWITCH_STATS_LEN;
 
 	for (i = 0; i < NN_ET_SWITCH_STATS_LEN; i++)
-		data = nfp_pr_et(data, nfp_net_et_stats[i + swap_off].name);
+		ethtool_sprintf(&data, nfp_net_et_stats[i + swap_off].name);
 
 	for (i = NN_ET_SWITCH_STATS_LEN; i < NN_ET_SWITCH_STATS_LEN * 2; i++)
-		data = nfp_pr_et(data, nfp_net_et_stats[i - swap_off].name);
+		ethtool_sprintf(&data, nfp_net_et_stats[i - swap_off].name);
 
 	for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++)
-		data = nfp_pr_et(data, nfp_net_et_stats[i].name);
+		ethtool_sprintf(&data, nfp_net_et_stats[i].name);
 
 	for (i = 0; i < num_vecs; i++) {
-		data = nfp_pr_et(data, "rxq_%u_pkts", i);
-		data = nfp_pr_et(data, "rxq_%u_bytes", i);
-		data = nfp_pr_et(data, "txq_%u_pkts", i);
-		data = nfp_pr_et(data, "txq_%u_bytes", i);
+		ethtool_sprintf(&data, "rxq_%u_pkts", i);
+		ethtool_sprintf(&data, "rxq_%u_bytes", i);
+		ethtool_sprintf(&data, "txq_%u_pkts", i);
+		ethtool_sprintf(&data, "txq_%u_bytes", i);
 	}
 
 	return data;
@@ -610,15 +599,15 @@ static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data)
 			memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN);
 			data += ETH_GSTRING_LEN;
 		} else {
-			data = nfp_pr_et(data, "dev_unknown_stat%u", id);
+			ethtool_sprintf(&data, "dev_unknown_stat%u", id);
 		}
 	}
 
 	for (i = 0; i < nn->max_r_vecs; i++) {
-		data = nfp_pr_et(data, "rxq_%u_pkts", i);
-		data = nfp_pr_et(data, "rxq_%u_bytes", i);
-		data = nfp_pr_et(data, "txq_%u_pkts", i);
-		data = nfp_pr_et(data, "txq_%u_bytes", i);
+		ethtool_sprintf(&data, "rxq_%u_pkts", i);
+		ethtool_sprintf(&data, "rxq_%u_bytes", i);
+		ethtool_sprintf(&data, "txq_%u_pkts", i);
+		ethtool_sprintf(&data, "txq_%u_bytes", i);
 	}
 
 	return data;
@@ -666,7 +655,7 @@ static u8 *nfp_mac_get_stats_strings(struct net_device *netdev, u8 *data)
 		return data;
 
 	for (i = 0; i < ARRAY_SIZE(nfp_mac_et_stats); i++)
-		data = nfp_pr_et(data, "mac.%s", nfp_mac_et_stats[i].name);
+		ethtool_sprintf(&data, "mac.%s", nfp_mac_et_stats[i].name);
 
 	return data;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index d7fd203bb180..ae4da189d955 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -92,8 +92,6 @@ struct nfp_port {
 
 extern const struct ethtool_ops nfp_port_ethtool_ops;
 
-__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...);
-
 int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 		      void *type_data);
 
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index 55cef5b16aa5..a6823c4d355d 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
@@ -612,15 +612,6 @@ void pch_gbe_free_tx_resources(struct pch_gbe_adapter *adapter,
 void pch_gbe_free_rx_resources(struct pch_gbe_adapter *adapter,
 			       struct pch_gbe_rx_ring *rx_ring);
 void pch_gbe_update_stats(struct pch_gbe_adapter *adapter);
-u32 pch_ch_control_read(struct pci_dev *pdev);
-void pch_ch_control_write(struct pci_dev *pdev, u32 val);
-u32 pch_ch_event_read(struct pci_dev *pdev);
-void pch_ch_event_write(struct pci_dev *pdev, u32 val);
-u32 pch_src_uuid_lo_read(struct pci_dev *pdev);
-u32 pch_src_uuid_hi_read(struct pci_dev *pdev);
-u64 pch_rx_snap_read(struct pci_dev *pdev);
-u64 pch_tx_snap_read(struct pci_dev *pdev);
-int pch_set_station_address(u8 *addr, struct pci_dev *pdev);
 
 /* pch_gbe_param.c */
 void pch_gbe_check_options(struct pch_gbe_adapter *adapter);
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 140cee7c459d..334af49e5add 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_classify.h>
+#include <linux/ptp_pch.h>
 #include <linux/gpio.h>
 
 #define DRV_VERSION     "1.01"
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index b0d8499d373b..e4a5416adc80 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -184,6 +184,10 @@ static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	struct device *dev = ionic->dev;
 	int ret = 0;
 
+	if (ionic->lif &&
+	    test_bit(IONIC_LIF_F_FW_RESET, ionic->lif->state))
+		return -EBUSY;
+
 	if (num_vfs > 0) {
 		ret = pci_enable_sriov(pdev, num_vfs);
 		if (ret) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index fb2b5bf179d7..0532f7cf086d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -14,18 +14,20 @@
 static void ionic_watchdog_cb(struct timer_list *t)
 {
 	struct ionic *ionic = from_timer(ionic, t, watchdog_timer);
+	struct ionic_lif *lif = ionic->lif;
 	int hb;
 
 	mod_timer(&ionic->watchdog_timer,
 		  round_jiffies(jiffies + ionic->watchdog_period));
 
-	if (!ionic->lif)
+	if (!lif)
 		return;
 
 	hb = ionic_heartbeat_check(ionic);
 
-	if (hb >= 0)
-		ionic_link_status_check_request(ionic->lif, CAN_NOT_SLEEP);
+	if (hb >= 0 &&
+	    !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+		ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
 }
 
 void ionic_init_devinfo(struct ionic *ionic)
@@ -585,9 +587,9 @@ void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
 void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
 		  void *cb_arg)
 {
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_desc_info *desc_info;
 	struct ionic_lif *lif = q->lif;
+	struct device *dev = q->dev;
 
 	desc_info = &q->info[q->head_idx];
 	desc_info->cb = cb;
@@ -629,7 +631,7 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
 
 	/* stop index must be for a descriptor that is not yet completed */
 	if (unlikely(!ionic_q_is_posted(q, stop_index)))
-		dev_err(q->lif->ionic->dev,
+		dev_err(q->dev,
 			"ionic stop is not posted %s stop %u tail %u head %u\n",
 			q->name, stop_index, q->tail_idx, q->head_idx);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 690768ff0143..ca7e55455165 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -170,11 +170,20 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
 			      struct ionic_desc_info *desc_info,
 			      struct ionic_cq_info *cq_info, void *cb_arg);
 
-struct ionic_page_info {
+#define IONIC_PAGE_SIZE				PAGE_SIZE
+#define IONIC_PAGE_SPLIT_SZ			(PAGE_SIZE / 2)
+#define IONIC_PAGE_GFP_MASK			(GFP_ATOMIC | __GFP_NOWARN |\
+						 __GFP_COMP | __GFP_MEMALLOC)
+
+struct ionic_buf_info {
 	struct page *page;
 	dma_addr_t dma_addr;
+	u32 page_offset;
+	u32 len;
 };
 
+#define IONIC_MAX_FRAGS			(1 + IONIC_TX_MAX_SG_ELEMS_V1)
+
 struct ionic_desc_info {
 	union {
 		void *desc;
@@ -187,8 +196,9 @@ struct ionic_desc_info {
 		struct ionic_txq_sg_desc *txq_sg_desc;
 		struct ionic_rxq_sg_desc *rxq_sgl_desc;
 	};
-	unsigned int npages;
-	struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
+	unsigned int bytes;
+	unsigned int nbufs;
+	struct ionic_buf_info bufs[IONIC_MAX_FRAGS];
 	ionic_desc_cb cb;
 	void *cb_arg;
 };
@@ -199,10 +209,12 @@ struct ionic_queue {
 	struct device *dev;
 	struct ionic_lif *lif;
 	struct ionic_desc_info *info;
+	u64 dbval;
 	u16 head_idx;
 	u16 tail_idx;
 	unsigned int index;
 	unsigned int num_descs;
+	unsigned int max_sg_elems;
 	u64 dbell_count;
 	u64 stop;
 	u64 wake;
@@ -211,7 +223,6 @@ struct ionic_queue {
 	unsigned int type;
 	unsigned int hw_index;
 	unsigned int hw_type;
-	u64 dbval;
 	union {
 		void *base;
 		struct ionic_txq_desc *txq;
@@ -229,7 +240,7 @@ struct ionic_queue {
 	unsigned int sg_desc_size;
 	unsigned int pid;
 	char name[IONIC_QUEUE_NAME_MAX_SZ];
-};
+} ____cacheline_aligned_in_smp;
 
 #define IONIC_INTR_INDEX_NOT_ASSIGNED	-1
 #define IONIC_INTR_NAME_MAX_SZ		32
@@ -256,7 +267,7 @@ struct ionic_cq {
 	u64 compl_count;
 	void *base;
 	dma_addr_t base_pa;
-};
+} ____cacheline_aligned_in_smp;
 
 struct ionic;
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 0832bedcb3b4..b1e78b452fad 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -29,11 +29,9 @@ static void ionic_get_stats_strings(struct ionic_lif *lif, u8 *buf)
 static void ionic_get_stats(struct net_device *netdev,
 			    struct ethtool_stats *stats, u64 *buf)
 {
-	struct ionic_lif *lif;
+	struct ionic_lif *lif = netdev_priv(netdev);
 	u32 i;
 
-	lif = netdev_priv(netdev);
-
 	memset(buf, 0, stats->n_stats * sizeof(*buf));
 	for (i = 0; i < ionic_num_stats_grps; i++)
 		ionic_stats_groups[i].get_values(lif, &buf);
@@ -209,6 +207,14 @@ static int ionic_get_link_ksettings(struct net_device *netdev,
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseER_Full);
 		break;
+	case IONIC_XCVR_PID_SFP_10GBASE_T:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		break;
+	case IONIC_XCVR_PID_SFP_1000BASE_T:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		break;
 	case IONIC_XCVR_PID_UNKNOWN:
 		/* This means there's no module plugged in */
 		break;
@@ -264,12 +270,10 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
 				    const struct ethtool_link_ksettings *ks)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_dev *idev = &lif->ionic->idev;
 	struct ionic *ionic = lif->ionic;
-	struct ionic_dev *idev;
 	int err = 0;
 
-	idev = &lif->ionic->idev;
-
 	/* set autoneg */
 	if (ks->base.autoneg != idev->port_info->config.an_enable) {
 		mutex_lock(&ionic->dev_cmd_lock);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 31ccfcdc2b0a..88210142395d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -320,7 +320,7 @@ struct ionic_lif_identify_comp {
 /**
  * enum ionic_lif_capability - LIF capabilities
  * @IONIC_LIF_CAP_ETH:     LIF supports Ethernet
- * @IONIC_LIF_CAP_RDMA:    LIF support RDMA
+ * @IONIC_LIF_CAP_RDMA:    LIF supports RDMA
  */
 enum ionic_lif_capability {
 	IONIC_LIF_CAP_ETH        = BIT(0),
@@ -404,7 +404,7 @@ union ionic_lif_config {
  *     @max_ucast_filters:  Number of perfect unicast addresses supported
  *     @max_mcast_filters:  Number of perfect multicast addresses supported
  *     @min_frame_size:     Minimum size of frames to be sent
- *     @max_frame_size:     Maximim size of frames to be sent
+ *     @max_frame_size:     Maximum size of frames to be sent
  *     @config:             LIF config struct with features, mtu, mac, q counts
  *
  * @rdma:                RDMA identify structure
@@ -692,7 +692,7 @@ enum ionic_txq_desc_opcode {
  *                      checksums are also updated.
  *
  *                   IONIC_TXQ_DESC_OPCODE_TSO:
- *                      Device preforms TCP segmentation offload
+ *                      Device performs TCP segmentation offload
  *                      (TSO).  @hdr_len is the number of bytes
  *                      to the end of TCP header (the offset to
  *                      the TCP payload).  @mss is the desired
@@ -982,13 +982,13 @@ struct ionic_rxq_comp {
 };
 
 enum ionic_pkt_type {
-	IONIC_PKT_TYPE_NON_IP     = 0x000,
-	IONIC_PKT_TYPE_IPV4       = 0x001,
-	IONIC_PKT_TYPE_IPV4_TCP   = 0x003,
-	IONIC_PKT_TYPE_IPV4_UDP   = 0x005,
-	IONIC_PKT_TYPE_IPV6       = 0x008,
-	IONIC_PKT_TYPE_IPV6_TCP   = 0x018,
-	IONIC_PKT_TYPE_IPV6_UDP   = 0x028,
+	IONIC_PKT_TYPE_NON_IP		= 0x00,
+	IONIC_PKT_TYPE_IPV4		= 0x01,
+	IONIC_PKT_TYPE_IPV4_TCP		= 0x03,
+	IONIC_PKT_TYPE_IPV4_UDP		= 0x05,
+	IONIC_PKT_TYPE_IPV6		= 0x08,
+	IONIC_PKT_TYPE_IPV6_TCP		= 0x18,
+	IONIC_PKT_TYPE_IPV6_UDP		= 0x28,
 	/* below types are only used if encap offloads are enabled on lif */
 	IONIC_PKT_TYPE_ENCAP_NON_IP	= 0x40,
 	IONIC_PKT_TYPE_ENCAP_IPV4	= 0x41,
@@ -1111,6 +1111,8 @@ enum ionic_xcvr_pid {
 	IONIC_XCVR_PID_QSFP_100G_CWDM4  = 69,
 	IONIC_XCVR_PID_QSFP_100G_PSM4   = 70,
 	IONIC_XCVR_PID_SFP_25GBASE_ACC  = 71,
+	IONIC_XCVR_PID_SFP_10GBASE_T    = 72,
+	IONIC_XCVR_PID_SFP_1000BASE_T   = 73,
 };
 
 /**
@@ -1331,7 +1333,7 @@ enum ionic_stats_ctl_cmd {
  * @IONIC_PORT_ATTR_STATE:      Port state attribute
  * @IONIC_PORT_ATTR_SPEED:      Port speed attribute
  * @IONIC_PORT_ATTR_MTU:        Port MTU attribute
- * @IONIC_PORT_ATTR_AUTONEG:    Port autonegotation attribute
+ * @IONIC_PORT_ATTR_AUTONEG:    Port autonegotiation attribute
  * @IONIC_PORT_ATTR_FEC:        Port FEC attribute
  * @IONIC_PORT_ATTR_PAUSE:      Port pause attribute
  * @IONIC_PORT_ATTR_LOOPBACK:   Port loopback attribute
@@ -1951,8 +1953,8 @@ enum ionic_qos_sched_type {
  * @pfc_cos:		Priority-Flow Control class of service
  * @dwrr_weight:	QoS class scheduling weight
  * @strict_rlmt:	Rate limit for strict priority scheduling
- * @rw_dot1q_pcp:	Rewrite dot1q pcp to this value	(valid iff F_RW_DOT1Q_PCP)
- * @rw_ip_dscp:		Rewrite ip dscp to this value	(valid iff F_RW_IP_DSCP)
+ * @rw_dot1q_pcp:	Rewrite dot1q pcp to value (valid iff F_RW_DOT1Q_PCP)
+ * @rw_ip_dscp:		Rewrite ip dscp to value (valid iff F_RW_IP_DSCP)
  * @dot1q_pcp:		Dot1q pcp value
  * @ndscp:		Number of valid dscp values in the ip_dscp field
  * @ip_dscp:		IP dscp values
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 11140915c2da..889d234e2ffa 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -120,23 +120,34 @@ static void ionic_link_status_check(struct ionic_lif *lif)
 	if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state))
 		return;
 
+	/* Don't put carrier back up if we're in a broken state */
+	if (test_bit(IONIC_LIF_F_BROKEN, lif->state)) {
+		clear_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state);
+		return;
+	}
+
 	link_status = le16_to_cpu(lif->info->status.link_status);
 	link_up = link_status == IONIC_PORT_OPER_STATUS_UP;
 
 	if (link_up) {
-		if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) {
+		int err = 0;
+
+		if (netdev->flags & IFF_UP && netif_running(netdev)) {
 			mutex_lock(&lif->queue_lock);
-			ionic_start_queues(lif);
+			err = ionic_start_queues(lif);
+			if (err) {
+				netdev_err(lif->netdev,
+					   "Failed to start queues: %d\n", err);
+				set_bit(IONIC_LIF_F_BROKEN, lif->state);
+				netif_carrier_off(lif->netdev);
+			}
 			mutex_unlock(&lif->queue_lock);
 		}
 
-		if (!netif_carrier_ok(netdev)) {
-			u32 link_speed;
-
+		if (!err && !netif_carrier_ok(netdev)) {
 			ionic_port_identify(lif->ionic);
-			link_speed = le32_to_cpu(lif->info->status.link_speed);
 			netdev_info(netdev, "Link up - %d Gbps\n",
-				    link_speed / 1000);
+				    le32_to_cpu(lif->info->status.link_speed) / 1000);
 			netif_carrier_on(netdev);
 		}
 	} else {
@@ -145,7 +156,7 @@ static void ionic_link_status_check(struct ionic_lif *lif)
 			netif_carrier_off(netdev);
 		}
 
-		if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) {
+		if (netdev->flags & IFF_UP && netif_running(netdev)) {
 			mutex_lock(&lif->queue_lock);
 			ionic_stop_queues(lif);
 			mutex_unlock(&lif->queue_lock);
@@ -382,6 +393,8 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
 static void ionic_qcqs_free(struct ionic_lif *lif)
 {
 	struct device *dev = lif->ionic->dev;
+	struct ionic_qcq *adminqcq;
+	unsigned long irqflags;
 
 	if (lif->notifyqcq) {
 		ionic_qcq_free(lif, lif->notifyqcq);
@@ -390,9 +403,14 @@ static void ionic_qcqs_free(struct ionic_lif *lif)
 	}
 
 	if (lif->adminqcq) {
-		ionic_qcq_free(lif, lif->adminqcq);
-		devm_kfree(dev, lif->adminqcq);
+		spin_lock_irqsave(&lif->adminq_lock, irqflags);
+		adminqcq = READ_ONCE(lif->adminqcq);
 		lif->adminqcq = NULL;
+		spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
+		if (adminqcq) {
+			ionic_qcq_free(lif, adminqcq);
+			devm_kfree(dev, adminqcq);
+		}
 	}
 
 	if (lif->rxqcqs) {
@@ -495,6 +513,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 		goto err_out;
 	}
 
+	new->q.dev = dev;
 	new->flags = flags;
 
 	new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info),
@@ -506,6 +525,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 	}
 
 	new->q.type = type;
+	new->q.max_sg_elems = lif->qtype_info[type].max_sg_elems;
 
 	err = ionic_q_init(lif, idev, &new->q, index, name, num_descs,
 			   desc_size, sg_desc_size, pid);
@@ -716,10 +736,8 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	unsigned int intr_index;
 	int err;
 
-	if (qcq->flags & IONIC_QCQ_F_INTR)
-		intr_index = qcq->intr.index;
-	else
-		intr_index = lif->rxqcqs[q->index]->intr.index;
+	intr_index = qcq->intr.index;
+
 	ctx.cmd.q_init.intr_index = cpu_to_le16(intr_index);
 
 	dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid);
@@ -837,7 +855,7 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
 
 	switch (le16_to_cpu(comp->event.ecode)) {
 	case IONIC_EVENT_LINK_CHANGE:
-		ionic_link_status_check_request(lif, false);
+		ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
 		break;
 	case IONIC_EVENT_RESET:
 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
@@ -875,6 +893,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 	struct ionic_intr_info *intr = napi_to_cq(napi)->bound_intr;
 	struct ionic_lif *lif = napi_to_cq(napi)->lif;
 	struct ionic_dev *idev = &lif->ionic->idev;
+	unsigned long irqflags;
 	unsigned int flags = 0;
 	int n_work = 0;
 	int a_work = 0;
@@ -884,14 +903,16 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 		n_work = ionic_cq_service(&lif->notifyqcq->cq, budget,
 					  ionic_notifyq_service, NULL, NULL);
 
+	spin_lock_irqsave(&lif->adminq_lock, irqflags);
 	if (lif->adminqcq && lif->adminqcq->flags & IONIC_QCQ_F_INITED)
 		a_work = ionic_cq_service(&lif->adminqcq->cq, budget,
 					  ionic_adminq_service, NULL, NULL);
+	spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 
 	work_done = max(n_work, a_work);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
 		flags |= IONIC_INTR_CRED_UNMASK;
-		lif->adminqcq->cq.bound_intr->rearm_count++;
+		intr->rearm_count++;
 	}
 
 	if (work_done || flags) {
@@ -1441,7 +1462,7 @@ static int ionic_start_queues_reconfig(struct ionic_lif *lif)
 	 */
 	err = ionic_txrx_init(lif);
 	mutex_unlock(&lif->queue_lock);
-	ionic_link_status_check_request(lif, true);
+	ionic_link_status_check_request(lif, CAN_SLEEP);
 	netif_device_attach(lif->netdev);
 
 	return err;
@@ -1480,7 +1501,8 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
 {
 	struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
 
-	netdev_info(lif->netdev, "Tx Timeout recovery\n");
+	if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+		return;
 
 	/* if we were stopped before this scheduled job was launched,
 	 * don't bother the queues as they are already stopped.
@@ -1496,6 +1518,7 @@ static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 
+	netdev_info(lif->netdev, "Tx Timeout triggered - txq %d\n", txqueue);
 	schedule_work(&lif->tx_timeout_work);
 }
 
@@ -1837,6 +1860,12 @@ static int ionic_start_queues(struct ionic_lif *lif)
 {
 	int err;
 
+	if (test_bit(IONIC_LIF_F_BROKEN, lif->state))
+		return -EIO;
+
+	if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+		return -EBUSY;
+
 	if (test_and_set_bit(IONIC_LIF_F_UP, lif->state))
 		return 0;
 
@@ -1855,13 +1884,17 @@ static int ionic_open(struct net_device *netdev)
 	struct ionic_lif *lif = netdev_priv(netdev);
 	int err;
 
+	/* If recovering from a broken state, clear the bit and we'll try again */
+	if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
+		netdev_info(netdev, "clearing broken state\n");
+
 	err = ionic_txrx_alloc(lif);
 	if (err)
 		return err;
 
 	err = ionic_txrx_init(lif);
 	if (err)
-		goto err_out;
+		goto err_txrx_free;
 
 	err = netif_set_real_num_tx_queues(netdev, lif->nxqs);
 	if (err)
@@ -1882,7 +1915,7 @@ static int ionic_open(struct net_device *netdev)
 
 err_txrx_deinit:
 	ionic_txrx_deinit(lif);
-err_out:
+err_txrx_free:
 	ionic_txrx_free(lif);
 	return err;
 }
@@ -2202,6 +2235,9 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 	swap(a->cq_base,      b->cq_base);
 	swap(a->cq_base_pa,   b->cq_base_pa);
 	swap(a->cq_size,      b->cq_size);
+
+	ionic_debugfs_del_qcq(a);
+	ionic_debugfs_add_qcq(a->q.lif, a);
 }
 
 int ionic_reconfigure_queues(struct ionic_lif *lif,
@@ -2351,7 +2387,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 	swap(lif->nxqs, qparam->nxqs);
 
 err_out_reinit_unlock:
-	/* re-init the queues, but don't loose an error code */
+	/* re-init the queues, but don't lose an error code */
 	if (err)
 		ionic_start_queues_reconfig(lif);
 	else
@@ -2450,7 +2486,6 @@ int ionic_lif_alloc(struct ionic *ionic)
 	lif->index = 0;
 	lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
 	lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
-	lif->tx_budget = IONIC_TX_BUDGET_DEFAULT;
 
 	/* Convert the default coalesce value to actual hw resolution */
 	lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT;
@@ -2601,7 +2636,7 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 	}
 
 	clear_bit(IONIC_LIF_F_FW_RESET, lif->state);
-	ionic_link_status_check_request(lif, true);
+	ionic_link_status_check_request(lif, CAN_SLEEP);
 	netif_device_attach(lif->netdev);
 	dev_info(ionic->dev, "FW Up: LIFs restarted\n");
 
@@ -2972,7 +3007,7 @@ int ionic_lif_register(struct ionic_lif *lif)
 		return err;
 	}
 
-	ionic_link_status_check_request(lif, true);
+	ionic_link_status_check_request(lif, CAN_SLEEP);
 	lif->registered = true;
 	ionic_lif_set_netdev_info(lif);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 563dba384a53..be5cc89b2bd9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -139,6 +139,7 @@ enum ionic_lif_state_flags {
 	IONIC_LIF_F_LINK_CHECK_REQUESTED,
 	IONIC_LIF_F_FW_RESET,
 	IONIC_LIF_F_SPLIT_INTR,
+	IONIC_LIF_F_BROKEN,
 	IONIC_LIF_F_TX_DIM_INTR,
 	IONIC_LIF_F_RX_DIM_INTR,
 
@@ -159,16 +160,11 @@ struct ionic_qtype_info {
 
 #define IONIC_LIF_NAME_MAX_SZ		32
 struct ionic_lif {
-	char name[IONIC_LIF_NAME_MAX_SZ];
-	struct list_head list;
 	struct net_device *netdev;
 	DECLARE_BITMAP(state, IONIC_LIF_F_STATE_SIZE);
 	struct ionic *ionic;
-	bool registered;
 	unsigned int index;
 	unsigned int hw_index;
-	unsigned int kern_pid;
-	u64 __iomem *kern_dbpage;
 	struct mutex queue_lock;	/* lock for queue structures */
 	spinlock_t adminq_lock;		/* lock for AdminQ operations */
 	struct ionic_qcq *adminqcq;
@@ -177,20 +173,25 @@ struct ionic_lif {
 	struct ionic_tx_stats *txqstats;
 	struct ionic_qcq **rxqcqs;
 	struct ionic_rx_stats *rxqstats;
+	struct ionic_deferred deferred;
+	struct work_struct tx_timeout_work;
 	u64 last_eid;
+	unsigned int kern_pid;
+	u64 __iomem *kern_dbpage;
 	unsigned int neqs;
 	unsigned int nxqs;
 	unsigned int ntxq_descs;
 	unsigned int nrxq_descs;
 	u32 rx_copybreak;
-	u32 tx_budget;
 	unsigned int rx_mode;
 	u64 hw_features;
+	bool registered;
 	bool mc_overflow;
-	unsigned int nmcast;
 	bool uc_overflow;
 	u16 lif_type;
+	unsigned int nmcast;
 	unsigned int nucast;
+	char name[IONIC_LIF_NAME_MAX_SZ];
 
 	union ionic_lif_identity *identity;
 	struct ionic_lif_info *info;
@@ -205,16 +206,14 @@ struct ionic_lif {
 	u32 rss_ind_tbl_sz;
 
 	struct ionic_rx_filters rx_filters;
-	struct ionic_deferred deferred;
-	unsigned long *dbid_inuse;
-	unsigned int dbid_count;
-	struct dentry *dentry;
 	u32 rx_coalesce_usecs;		/* what the user asked for */
 	u32 rx_coalesce_hw;		/* what the hw is using */
 	u32 tx_coalesce_usecs;		/* what the user asked for */
 	u32 tx_coalesce_hw;		/* what the hw is using */
+	unsigned long *dbid_inuse;
+	unsigned int dbid_count;
 
-	struct work_struct tx_timeout_work;
+	struct dentry *dentry;
 };
 
 struct ionic_queue_params {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index fbc57de6683e..c4b2906a2ae6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -187,10 +187,17 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 
 static void ionic_adminq_flush(struct ionic_lif *lif)
 {
-	struct ionic_queue *q = &lif->adminqcq->q;
 	struct ionic_desc_info *desc_info;
+	unsigned long irqflags;
+	struct ionic_queue *q;
+
+	spin_lock_irqsave(&lif->adminq_lock, irqflags);
+	if (!lif->adminqcq) {
+		spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
+		return;
+	}
 
-	spin_lock(&lif->adminq_lock);
+	q = &lif->adminqcq->q;
 
 	while (q->tail_idx != q->head_idx) {
 		desc_info = &q->info[q->tail_idx];
@@ -199,7 +206,7 @@ static void ionic_adminq_flush(struct ionic_lif *lif)
 		desc_info->cb_arg = NULL;
 		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 	}
-	spin_unlock(&lif->adminq_lock);
+	spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 }
 
 static int ionic_adminq_check_err(struct ionic_lif *lif,
@@ -234,17 +241,15 @@ static void ionic_adminq_cb(struct ionic_queue *q,
 {
 	struct ionic_admin_ctx *ctx = cb_arg;
 	struct ionic_admin_comp *comp;
-	struct device *dev;
 
 	if (!ctx)
 		return;
 
 	comp = cq_info->cq_desc;
-	dev = &q->lif->netdev->dev;
 
 	memcpy(&ctx->comp, comp, sizeof(*comp));
 
-	dev_dbg(dev, "comp admin queue command:\n");
+	dev_dbg(q->dev, "comp admin queue command:\n");
 	dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
 			 &ctx->comp, sizeof(ctx->comp), true);
 
@@ -254,15 +259,18 @@ static void ionic_adminq_cb(struct ionic_queue *q,
 static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 {
 	struct ionic_desc_info *desc_info;
+	unsigned long irqflags;
 	struct ionic_queue *q;
 	int err = 0;
 
-	if (!lif->adminqcq)
+	spin_lock_irqsave(&lif->adminq_lock, irqflags);
+	if (!lif->adminqcq) {
+		spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 		return -EIO;
+	}
 
 	q = &lif->adminqcq->q;
 
-	spin_lock(&lif->adminq_lock);
 	if (!ionic_q_has_space(q, 1)) {
 		err = -ENOSPC;
 		goto err_out;
@@ -282,7 +290,7 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 	ionic_q_post(q, true, ionic_adminq_cb, ctx);
 
 err_out:
-	spin_unlock(&lif->adminq_lock);
+	spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 6ae75b771a15..308b4ac6c57b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -246,98 +246,73 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
 	return total;
 }
 
+static void ionic_sw_stats_get_tx_strings(struct ionic_lif *lif, u8 **buf,
+					  int q_num)
+{
+	int i;
+
+	for (i = 0; i < IONIC_NUM_TX_STATS; i++)
+		ethtool_sprintf(buf, "tx_%d_%s", q_num,
+				ionic_tx_stats_desc[i].name);
+
+	if (!test_bit(IONIC_LIF_F_UP, lif->state) ||
+	    !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state))
+		return;
+
+	for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++)
+		ethtool_sprintf(buf, "txq_%d_%s", q_num,
+				ionic_txq_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++)
+		ethtool_sprintf(buf, "txq_%d_cq_%s", q_num,
+				ionic_dbg_cq_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++)
+		ethtool_sprintf(buf, "txq_%d_intr_%s", q_num,
+				ionic_dbg_intr_stats_desc[i].name);
+	for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++)
+		ethtool_sprintf(buf, "txq_%d_sg_cntr_%d", q_num, i);
+}
+
+static void ionic_sw_stats_get_rx_strings(struct ionic_lif *lif, u8 **buf,
+					  int q_num)
+{
+	int i;
+
+	for (i = 0; i < IONIC_NUM_RX_STATS; i++)
+		ethtool_sprintf(buf, "rx_%d_%s", q_num,
+				ionic_rx_stats_desc[i].name);
+
+	if (!test_bit(IONIC_LIF_F_UP, lif->state) ||
+	    !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state))
+		return;
+
+	for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++)
+		ethtool_sprintf(buf, "rxq_%d_cq_%s", q_num,
+				ionic_dbg_cq_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++)
+		ethtool_sprintf(buf, "rxq_%d_intr_%s", q_num,
+				ionic_dbg_intr_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++)
+		ethtool_sprintf(buf, "rxq_%d_napi_%s", q_num,
+				ionic_dbg_napi_stats_desc[i].name);
+	for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++)
+		ethtool_sprintf(buf, "rxq_%d_napi_work_done_%d", q_num, i);
+}
+
 static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
 {
 	int i, q_num;
 
-	for (i = 0; i < IONIC_NUM_LIF_STATS; i++) {
-		snprintf(*buf, ETH_GSTRING_LEN, ionic_lif_stats_desc[i].name);
-		*buf += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < IONIC_NUM_LIF_STATS; i++)
+		ethtool_sprintf(buf, ionic_lif_stats_desc[i].name);
 
-	for (i = 0; i < IONIC_NUM_PORT_STATS; i++) {
-		snprintf(*buf, ETH_GSTRING_LEN,
-			 ionic_port_stats_desc[i].name);
-		*buf += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < IONIC_NUM_PORT_STATS; i++)
+		ethtool_sprintf(buf, ionic_port_stats_desc[i].name);
 
-	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
-			snprintf(*buf, ETH_GSTRING_LEN, "tx_%d_%s",
-				 q_num, ionic_tx_stats_desc[i].name);
-			*buf += ETH_GSTRING_LEN;
-		}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
+		ionic_sw_stats_get_tx_strings(lif, buf, q_num);
 
-		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
-		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_%s",
-					 q_num,
-					 ionic_txq_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_cq_%s",
-					 q_num,
-					 ionic_dbg_cq_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_intr_%s",
-					 q_num,
-					 ionic_dbg_intr_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_sg_cntr_%d",
-					 q_num, i);
-				*buf += ETH_GSTRING_LEN;
-			}
-		}
-	}
-	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
-			snprintf(*buf, ETH_GSTRING_LEN,
-				 "rx_%d_%s",
-				 q_num, ionic_rx_stats_desc[i].name);
-			*buf += ETH_GSTRING_LEN;
-		}
-
-		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
-		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_cq_%s",
-					 q_num,
-					 ionic_dbg_cq_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_intr_%s",
-					 q_num,
-					 ionic_dbg_intr_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_napi_%s",
-					 q_num,
-					 ionic_dbg_napi_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_napi_work_done_%d",
-					 q_num, i);
-				*buf += ETH_GSTRING_LEN;
-			}
-		}
-	}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
+		ionic_sw_stats_get_rx_strings(lif, buf, q_num);
 }
 
 static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 4087311f7082..5985f7c504a9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -10,12 +10,6 @@
 #include "ionic_lif.h"
 #include "ionic_txrx.h"
 
-static void ionic_rx_clean(struct ionic_queue *q,
-			   struct ionic_desc_info *desc_info,
-			   struct ionic_cq_info *cq_info,
-			   void *cb_arg);
-
-static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
 
 static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
 
@@ -40,72 +34,149 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
 	return netdev_get_tx_queue(q->lif->netdev, q->index);
 }
 
-static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
-					  unsigned int len, bool frags)
+static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
+{
+	buf_info->page = NULL;
+	buf_info->page_offset = 0;
+	buf_info->dma_addr = 0;
+}
+
+static int ionic_rx_page_alloc(struct ionic_queue *q,
+			       struct ionic_buf_info *buf_info)
 {
-	struct ionic_lif *lif = q->lif;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
-	struct sk_buff *skb;
+	struct device *dev;
 
-	netdev = lif->netdev;
-	stats = &q->lif->rxqstats[q->index];
+	dev = q->dev;
+	stats = q_to_rx_stats(q);
 
-	if (frags)
-		skb = napi_get_frags(&q_to_qcq(q)->napi);
-	else
-		skb = netdev_alloc_skb_ip_align(netdev, len);
+	if (unlikely(!buf_info)) {
+		net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
+				    netdev->name, q->name);
+		return -EINVAL;
+	}
 
-	if (unlikely(!skb)) {
-		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
-				     netdev->name, q->name);
+	buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
+	if (unlikely(!buf_info->page)) {
+		net_err_ratelimited("%s: %s page alloc failed\n",
+				    netdev->name, q->name);
 		stats->alloc_err++;
-		return NULL;
+		return -ENOMEM;
 	}
+	buf_info->page_offset = 0;
 
-	return skb;
+	buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
+					  IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
+		__free_pages(buf_info->page, 0);
+		ionic_rx_buf_reset(buf_info);
+		net_err_ratelimited("%s: %s dma map failed\n",
+				    netdev->name, q->name);
+		stats->dma_map_err++;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void ionic_rx_page_free(struct ionic_queue *q,
+			       struct ionic_buf_info *buf_info)
+{
+	struct net_device *netdev = q->lif->netdev;
+	struct device *dev = q->dev;
+
+	if (unlikely(!buf_info)) {
+		net_err_ratelimited("%s: %s invalid buf_info in free\n",
+				    netdev->name, q->name);
+		return;
+	}
+
+	if (!buf_info->page)
+		return;
+
+	dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+	__free_pages(buf_info->page, 0);
+	ionic_rx_buf_reset(buf_info);
+}
+
+static bool ionic_rx_buf_recycle(struct ionic_queue *q,
+				 struct ionic_buf_info *buf_info, u32 used)
+{
+	u32 size;
+
+	/* don't re-use pages allocated in low-mem condition */
+	if (page_is_pfmemalloc(buf_info->page))
+		return false;
+
+	/* don't re-use buffers from non-local numa nodes */
+	if (page_to_nid(buf_info->page) != numa_mem_id())
+		return false;
+
+	size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
+	buf_info->page_offset += size;
+	if (buf_info->page_offset >= IONIC_PAGE_SIZE)
+		return false;
+
+	get_page(buf_info->page);
+
+	return true;
 }
 
 static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_desc_info *desc_info,
-				      struct ionic_cq_info *cq_info)
+				      struct ionic_rxq_comp *comp)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
-	struct device *dev = q->lif->ionic->dev;
-	struct ionic_page_info *page_info;
+	struct net_device *netdev = q->lif->netdev;
+	struct ionic_buf_info *buf_info;
+	struct ionic_rx_stats *stats;
+	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	unsigned int i;
 	u16 frag_len;
 	u16 len;
 
-	page_info = &desc_info->pages[0];
+	stats = q_to_rx_stats(q);
+
+	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
-	prefetch(page_address(page_info->page) + NET_IP_ALIGN);
+	prefetch(buf_info->page);
 
-	skb = ionic_rx_skb_alloc(q, len, true);
-	if (unlikely(!skb))
+	skb = napi_get_frags(&q_to_qcq(q)->napi);
+	if (unlikely(!skb)) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
 		return NULL;
+	}
 
 	i = comp->num_sg_elems + 1;
 	do {
-		if (unlikely(!page_info->page)) {
-			struct napi_struct *napi = &q_to_qcq(q)->napi;
-
-			napi->skb = NULL;
+		if (unlikely(!buf_info->page)) {
 			dev_kfree_skb(skb);
 			return NULL;
 		}
 
-		frag_len = min(len, (u16)PAGE_SIZE);
+		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
 		len -= frag_len;
 
-		dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+		dma_sync_single_for_cpu(dev,
+					buf_info->dma_addr + buf_info->page_offset,
+					frag_len, DMA_FROM_DEVICE);
+
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				page_info->page, 0, frag_len, PAGE_SIZE);
-		page_info->page = NULL;
-		page_info++;
+				buf_info->page, buf_info->page_offset, frag_len,
+				IONIC_PAGE_SIZE);
+
+		if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
+			dma_unmap_page(dev, buf_info->dma_addr,
+				       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+			ionic_rx_buf_reset(buf_info);
+		}
+
+		buf_info++;
+
 		i--;
 	} while (i > 0);
 
@@ -114,30 +185,37 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 
 static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 					  struct ionic_desc_info *desc_info,
-					  struct ionic_cq_info *cq_info)
+					  struct ionic_rxq_comp *comp)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
-	struct device *dev = q->lif->ionic->dev;
-	struct ionic_page_info *page_info;
+	struct net_device *netdev = q->lif->netdev;
+	struct ionic_buf_info *buf_info;
+	struct ionic_rx_stats *stats;
+	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	u16 len;
 
-	page_info = &desc_info->pages[0];
+	stats = q_to_rx_stats(q);
+
+	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
-	skb = ionic_rx_skb_alloc(q, len, false);
-	if (unlikely(!skb))
+	skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
+	if (unlikely(!skb)) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
 		return NULL;
+	}
 
-	if (unlikely(!page_info->page)) {
+	if (unlikely(!buf_info->page)) {
 		dev_kfree_skb(skb);
 		return NULL;
 	}
 
-	dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
+	dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
 				len, DMA_FROM_DEVICE);
-	skb_copy_to_linear_data(skb, page_address(page_info->page), len);
-	dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
+	skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
+	dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
 				   len, DMA_FROM_DEVICE);
 
 	skb_put(skb, len);
@@ -151,14 +229,13 @@ static void ionic_rx_clean(struct ionic_queue *q,
 			   struct ionic_cq_info *cq_info,
 			   void *cb_arg)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_rxq_comp *comp = cq_info->rxcq;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_qcq *qcq = q_to_qcq(q);
 	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
 	struct sk_buff *skb;
 
 	stats = q_to_rx_stats(q);
-	netdev = q->lif->netdev;
 
 	if (comp->status) {
 		stats->dropped++;
@@ -169,9 +246,9 @@ static void ionic_rx_clean(struct ionic_queue *q,
 	stats->bytes += le16_to_cpu(comp->len);
 
 	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
-		skb = ionic_rx_copybreak(q, desc_info, cq_info);
+		skb = ionic_rx_copybreak(q, desc_info, comp);
 	else
-		skb = ionic_rx_frags(q, desc_info, cq_info);
+		skb = ionic_rx_frags(q, desc_info, comp);
 
 	if (unlikely(!skb)) {
 		stats->dropped++;
@@ -227,7 +304,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 
 static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_rxq_comp *comp = cq_info->rxcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
 
@@ -253,138 +330,75 @@ static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	return true;
 }
 
-static int ionic_rx_page_alloc(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
-{
-	struct ionic_lif *lif = q->lif;
-	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
-	stats = q_to_rx_stats(q);
-
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
-				    netdev->name, q->name);
-		return -EINVAL;
-	}
-
-	page_info->page = dev_alloc_page();
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s page alloc failed\n",
-				    netdev->name, q->name);
-		stats->alloc_err++;
-		return -ENOMEM;
-	}
-
-	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
-					   DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
-		put_page(page_info->page);
-		page_info->dma_addr = 0;
-		page_info->page = NULL;
-		net_err_ratelimited("%s: %s dma map failed\n",
-				    netdev->name, q->name);
-		stats->dma_map_err++;
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void ionic_rx_page_free(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
-{
-	struct ionic_lif *lif = q->lif;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
-
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in free\n",
-				    netdev->name, q->name);
-		return;
-	}
-
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s invalid page in free\n",
-				    netdev->name, q->name);
-		return;
-	}
-
-	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
-
-	put_page(page_info->page);
-	page_info->dma_addr = 0;
-	page_info->page = NULL;
-}
-
 void ionic_rx_fill(struct ionic_queue *q)
 {
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_desc_info *desc_info;
-	struct ionic_page_info *page_info;
 	struct ionic_rxq_sg_desc *sg_desc;
 	struct ionic_rxq_sg_elem *sg_elem;
+	struct ionic_buf_info *buf_info;
 	struct ionic_rxq_desc *desc;
 	unsigned int remain_len;
-	unsigned int seg_len;
+	unsigned int frag_len;
 	unsigned int nfrags;
 	unsigned int i, j;
 	unsigned int len;
 
 	len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
-	nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
 
 	for (i = ionic_q_space_avail(q); i; i--) {
+		nfrags = 0;
 		remain_len = len;
 		desc_info = &q->info[q->head_idx];
 		desc = desc_info->desc;
-		sg_desc = desc_info->sg_desc;
-		page_info = &desc_info->pages[0];
+		buf_info = &desc_info->bufs[0];
 
-		if (page_info->page) { /* recycle the buffer */
-			ionic_rxq_post(q, false, ionic_rx_clean, NULL);
-			continue;
-		}
-
-		/* fill main descriptor - pages[0] */
-		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
-					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
-		desc_info->npages = nfrags;
-		if (unlikely(ionic_rx_page_alloc(q, page_info))) {
-			desc->addr = 0;
-			desc->len = 0;
-			return;
+		if (!buf_info->page) { /* alloc a new buffer? */
+			if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+				desc->addr = 0;
+				desc->len = 0;
+				return;
+			}
 		}
-		desc->addr = cpu_to_le64(page_info->dma_addr);
-		seg_len = min_t(unsigned int, PAGE_SIZE, len);
-		desc->len = cpu_to_le16(seg_len);
-		remain_len -= seg_len;
-		page_info++;
 
-		/* fill sg descriptors - pages[1..n] */
-		for (j = 0; j < nfrags - 1; j++) {
-			if (page_info->page) /* recycle the sg buffer */
-				continue;
+		/* fill main descriptor - buf[0] */
+		desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
+		desc->len = cpu_to_le16(frag_len);
+		remain_len -= frag_len;
+		buf_info++;
+		nfrags++;
 
+		/* fill sg descriptors - buf[1..n] */
+		sg_desc = desc_info->sg_desc;
+		for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++) {
 			sg_elem = &sg_desc->elems[j];
-			if (unlikely(ionic_rx_page_alloc(q, page_info))) {
-				sg_elem->addr = 0;
-				sg_elem->len = 0;
-				return;
+			if (!buf_info->page) { /* alloc a new sg buffer? */
+				if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+					sg_elem->addr = 0;
+					sg_elem->len = 0;
+					return;
+				}
 			}
-			sg_elem->addr = cpu_to_le64(page_info->dma_addr);
-			seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
-			sg_elem->len = cpu_to_le16(seg_len);
-			remain_len -= seg_len;
-			page_info++;
+
+			sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+			frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
+			sg_elem->len = cpu_to_le16(frag_len);
+			remain_len -= frag_len;
+			buf_info++;
+			nfrags++;
+		}
+
+		/* clear end sg element as a sentinel */
+		if (j < q->max_sg_elems) {
+			sg_elem = &sg_desc->elems[j];
+			memset(sg_elem, 0, sizeof(*sg_elem));
 		}
 
+		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
+					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
+		desc_info->nbufs = nfrags;
+
 		ionic_rxq_post(q, false, ionic_rx_clean, NULL);
 	}
 
@@ -395,21 +409,24 @@ void ionic_rx_fill(struct ionic_queue *q)
 void ionic_rx_empty(struct ionic_queue *q)
 {
 	struct ionic_desc_info *desc_info;
-	struct ionic_page_info *page_info;
+	struct ionic_buf_info *buf_info;
 	unsigned int i, j;
 
 	for (i = 0; i < q->num_descs; i++) {
 		desc_info = &q->info[i];
 		for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
-			page_info = &desc_info->pages[j];
-			if (page_info->page)
-				ionic_rx_page_free(q, page_info);
+			buf_info = &desc_info->bufs[j];
+			if (buf_info->page)
+				ionic_rx_page_free(q, buf_info);
 		}
 
-		desc_info->npages = 0;
+		desc_info->nbufs = 0;
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
 	}
+
+	q->head_idx = 0;
+	q->tail_idx = 0;
 }
 
 static void ionic_dim_update(struct ionic_qcq *qcq)
@@ -525,7 +542,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
 	idev = &lif->ionic->idev;
 	txcq = &lif->txqcqs[qi]->cq;
 
-	tx_work_done = ionic_cq_service(txcq, lif->tx_budget,
+	tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT,
 					ionic_tx_service, NULL, NULL);
 
 	rx_work_done = ionic_cq_service(rxcq, budget,
@@ -558,7 +575,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q,
 				      void *data, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 
 	dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
@@ -576,7 +593,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
 				    size_t offset, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 
 	dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE);
@@ -588,42 +605,72 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
 	return dma_addr;
 }
 
+static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
+			    struct ionic_desc_info *desc_info)
+{
+	struct ionic_buf_info *buf_info = desc_info->bufs;
+	struct device *dev = q->dev;
+	dma_addr_t dma_addr;
+	unsigned int nfrags;
+	skb_frag_t *frag;
+	int frag_idx;
+
+	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
+	if (dma_mapping_error(dev, dma_addr))
+		return -EIO;
+	buf_info->dma_addr = dma_addr;
+	buf_info->len = skb_headlen(skb);
+	buf_info++;
+
+	frag = skb_shinfo(skb)->frags;
+	nfrags = skb_shinfo(skb)->nr_frags;
+	for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) {
+		dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
+		if (dma_mapping_error(dev, dma_addr))
+			goto dma_fail;
+		buf_info->dma_addr = dma_addr;
+		buf_info->len = skb_frag_size(frag);
+		buf_info++;
+	}
+
+	desc_info->nbufs = 1 + nfrags;
+
+	return 0;
+
+dma_fail:
+	/* unwind the frag mappings and the head mapping */
+	while (frag_idx > 0) {
+		frag_idx--;
+		buf_info--;
+		dma_unmap_page(dev, buf_info->dma_addr,
+			       buf_info->len, DMA_TO_DEVICE);
+	}
+	dma_unmap_single(dev, buf_info->dma_addr, buf_info->len, DMA_TO_DEVICE);
+	return -EIO;
+}
+
 static void ionic_tx_clean(struct ionic_queue *q,
 			   struct ionic_desc_info *desc_info,
 			   struct ionic_cq_info *cq_info,
 			   void *cb_arg)
 {
-	struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc;
-	struct ionic_txq_sg_elem *elem = sg_desc->elems;
+	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct ionic_txq_desc *desc = desc_info->desc;
-	struct device *dev = q->lif->ionic->dev;
-	u8 opcode, flags, nsge;
+	struct device *dev = q->dev;
 	u16 queue_index;
 	unsigned int i;
-	u64 addr;
-
-	decode_txq_desc_cmd(le64_to_cpu(desc->cmd),
-			    &opcode, &flags, &nsge, &addr);
-
-	/* use unmap_single only if either this is not TSO,
-	 * or this is first descriptor of a TSO
-	 */
-	if (opcode != IONIC_TXQ_DESC_OPCODE_TSO ||
-	    flags & IONIC_TXQ_DESC_FLAG_TSO_SOT)
-		dma_unmap_single(dev, (dma_addr_t)addr,
-				 le16_to_cpu(desc->len), DMA_TO_DEVICE);
-	else
-		dma_unmap_page(dev, (dma_addr_t)addr,
-			       le16_to_cpu(desc->len), DMA_TO_DEVICE);
 
-	for (i = 0; i < nsge; i++, elem++)
-		dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr),
-			       le16_to_cpu(elem->len), DMA_TO_DEVICE);
+	if (desc_info->nbufs) {
+		dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+				 buf_info->len, DMA_TO_DEVICE);
+		buf_info++;
+		for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+			dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+				       buf_info->len, DMA_TO_DEVICE);
+	}
 
 	if (cb_arg) {
 		struct sk_buff *skb = cb_arg;
-		u32 len = skb->len;
 
 		queue_index = skb_get_queue_mapping(skb);
 		if (unlikely(__netif_subqueue_stopped(q->lif->netdev,
@@ -631,17 +678,21 @@ static void ionic_tx_clean(struct ionic_queue *q,
 			netif_wake_subqueue(q->lif->netdev, queue_index);
 			q->wake++;
 		}
-		dev_kfree_skb_any(skb);
+
+		desc_info->bytes = skb->len;
 		stats->clean++;
-		netdev_tx_completed_queue(q_to_ndq(q), 1, len);
+
+		dev_consume_skb_any(skb);
 	}
 }
 
 static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_txq_comp *comp = cq_info->cq_desc;
+	struct ionic_txq_comp *comp = cq_info->txcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
+	int bytes = 0;
+	int pkts = 0;
 	u16 index;
 
 	if (!color_match(comp->color, cq->done_color))
@@ -652,13 +703,21 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	 */
 	do {
 		desc_info = &q->info[q->tail_idx];
+		desc_info->bytes = 0;
 		index = q->tail_idx;
 		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 		ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg);
+		if (desc_info->cb_arg) {
+			pkts++;
+			bytes += desc_info->bytes;
+		}
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
 	} while (index != le16_to_cpu(comp->comp_index));
 
+	if (pkts && bytes)
+		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
+
 	return true;
 }
 
@@ -677,15 +736,25 @@ void ionic_tx_flush(struct ionic_cq *cq)
 void ionic_tx_empty(struct ionic_queue *q)
 {
 	struct ionic_desc_info *desc_info;
+	int bytes = 0;
+	int pkts = 0;
 
 	/* walk the not completed tx entries, if any */
 	while (q->head_idx != q->tail_idx) {
 		desc_info = &q->info[q->tail_idx];
+		desc_info->bytes = 0;
 		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 		ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg);
+		if (desc_info->cb_arg) {
+			pkts++;
+			bytes += desc_info->bytes;
+		}
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
 	}
+
+	if (pkts && bytes)
+		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
 }
 
 static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb)
@@ -756,50 +825,33 @@ static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc
 	desc->hdr_len = cpu_to_le16(hdrlen);
 	desc->mss = cpu_to_le16(mss);
 
-	if (done) {
+	if (start) {
 		skb_tx_timestamp(skb);
 		netdev_tx_sent_queue(q_to_ndq(q), skb->len);
-		ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+		ionic_txq_post(q, false, ionic_tx_clean, skb);
 	} else {
-		ionic_txq_post(q, false, ionic_tx_clean, NULL);
+		ionic_txq_post(q, done, NULL, NULL);
 	}
 }
 
-static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
-						struct ionic_txq_sg_elem **elem)
-{
-	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
-	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
-
-	*elem = sg_desc->elems;
-	return desc;
-}
-
 static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct ionic_desc_info *rewind_desc_info;
-	struct device *dev = q->lif->ionic->dev;
+	struct ionic_desc_info *desc_info;
+	struct ionic_buf_info *buf_info;
 	struct ionic_txq_sg_elem *elem;
 	struct ionic_txq_desc *desc;
-	unsigned int frag_left = 0;
-	unsigned int offset = 0;
-	u16 abort = q->head_idx;
-	unsigned int len_left;
+	unsigned int chunk_len;
+	unsigned int frag_rem;
+	unsigned int tso_rem;
+	unsigned int seg_rem;
 	dma_addr_t desc_addr;
+	dma_addr_t frag_addr;
 	unsigned int hdrlen;
-	unsigned int nfrags;
-	unsigned int seglen;
-	u64 total_bytes = 0;
-	u64 total_pkts = 0;
-	u16 rewind = abort;
-	unsigned int left;
 	unsigned int len;
 	unsigned int mss;
-	skb_frag_t *frag;
 	bool start, done;
 	bool outer_csum;
-	dma_addr_t addr;
 	bool has_vlan;
 	u16 desc_len;
 	u8 desc_nsge;
@@ -807,9 +859,14 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	bool encap;
 	int err;
 
+	desc_info = &q->info[q->head_idx];
+	buf_info = desc_info->bufs;
+
+	if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
+		return -EIO;
+
+	len = skb->len;
 	mss = skb_shinfo(skb)->gso_size;
-	nfrags = skb_shinfo(skb)->nr_frags;
-	len_left = skb->len - skb_headlen(skb);
 	outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) ||
 		     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
 	has_vlan = !!skb_vlan_tag_present(skb);
@@ -834,125 +891,75 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	else
 		hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
-	seglen = hdrlen + mss;
-	left = skb_headlen(skb);
+	tso_rem = len;
+	seg_rem = min(tso_rem, hdrlen + mss);
 
-	desc = ionic_tx_tso_next(q, &elem);
-	start = true;
+	frag_addr = 0;
+	frag_rem = 0;
 
-	/* Chop skb->data up into desc segments */
+	start = true;
 
-	while (left > 0) {
-		len = min(seglen, left);
-		frag_left = seglen - len;
-		desc_addr = ionic_tx_map_single(q, skb->data + offset, len);
-		if (dma_mapping_error(dev, desc_addr))
-			goto err_out_abort;
-		desc_len = len;
+	while (tso_rem > 0) {
+		desc = NULL;
+		elem = NULL;
+		desc_addr = 0;
+		desc_len = 0;
 		desc_nsge = 0;
-		left -= len;
-		offset += len;
-		if (nfrags > 0 && frag_left > 0)
-			continue;
-		done = (nfrags == 0 && left == 0);
-		ionic_tx_tso_post(q, desc, skb,
-				  desc_addr, desc_nsge, desc_len,
-				  hdrlen, mss,
-				  outer_csum,
-				  vlan_tci, has_vlan,
-				  start, done);
-		total_pkts++;
-		total_bytes += start ? len : len + hdrlen;
-		desc = ionic_tx_tso_next(q, &elem);
-		start = false;
-		seglen = mss;
-	}
-
-	/* Chop skb frags into desc segments */
-
-	for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
-		offset = 0;
-		left = skb_frag_size(frag);
-		len_left -= left;
-		nfrags--;
-		stats->frags++;
-
-		while (left > 0) {
-			if (frag_left > 0) {
-				len = min(frag_left, left);
-				frag_left -= len;
-				addr = ionic_tx_map_frag(q, frag, offset, len);
-				if (dma_mapping_error(dev, addr))
-					goto err_out_abort;
-				elem->addr = cpu_to_le64(addr);
-				elem->len = cpu_to_le16(len);
+		/* use fragments until we have enough to post a single descriptor */
+		while (seg_rem > 0) {
+			/* if the fragment is exhausted then move to the next one */
+			if (frag_rem == 0) {
+				/* grab the next fragment */
+				frag_addr = buf_info->dma_addr;
+				frag_rem = buf_info->len;
+				buf_info++;
+			}
+			chunk_len = min(frag_rem, seg_rem);
+			if (!desc) {
+				/* fill main descriptor */
+				desc = desc_info->txq_desc;
+				elem = desc_info->txq_sg_desc->elems;
+				desc_addr = frag_addr;
+				desc_len = chunk_len;
+			} else {
+				/* fill sg descriptor */
+				elem->addr = cpu_to_le64(frag_addr);
+				elem->len = cpu_to_le16(chunk_len);
 				elem++;
 				desc_nsge++;
-				left -= len;
-				offset += len;
-				if (nfrags > 0 && frag_left > 0)
-					continue;
-				done = (nfrags == 0 && left == 0);
-				ionic_tx_tso_post(q, desc, skb, desc_addr,
-						  desc_nsge, desc_len,
-						  hdrlen, mss, outer_csum,
-						  vlan_tci, has_vlan,
-						  start, done);
-				total_pkts++;
-				total_bytes += start ? len : len + hdrlen;
-				desc = ionic_tx_tso_next(q, &elem);
-				start = false;
-			} else {
-				len = min(mss, left);
-				frag_left = mss - len;
-				desc_addr = ionic_tx_map_frag(q, frag,
-							      offset, len);
-				if (dma_mapping_error(dev, desc_addr))
-					goto err_out_abort;
-				desc_len = len;
-				desc_nsge = 0;
-				left -= len;
-				offset += len;
-				if (nfrags > 0 && frag_left > 0)
-					continue;
-				done = (nfrags == 0 && left == 0);
-				ionic_tx_tso_post(q, desc, skb, desc_addr,
-						  desc_nsge, desc_len,
-						  hdrlen, mss, outer_csum,
-						  vlan_tci, has_vlan,
-						  start, done);
-				total_pkts++;
-				total_bytes += start ? len : len + hdrlen;
-				desc = ionic_tx_tso_next(q, &elem);
-				start = false;
 			}
+			frag_addr += chunk_len;
+			frag_rem -= chunk_len;
+			tso_rem -= chunk_len;
+			seg_rem -= chunk_len;
 		}
+		seg_rem = min(tso_rem, mss);
+		done = (tso_rem == 0);
+		/* post descriptor */
+		ionic_tx_tso_post(q, desc, skb,
+				  desc_addr, desc_nsge, desc_len,
+				  hdrlen, mss, outer_csum, vlan_tci, has_vlan,
+				  start, done);
+		start = false;
+		/* Buffer information is stored with the first tso descriptor */
+		desc_info = &q->info[q->head_idx];
+		desc_info->nbufs = 0;
 	}
 
-	stats->pkts += total_pkts;
-	stats->bytes += total_bytes;
+	stats->pkts += DIV_ROUND_UP(len - hdrlen, mss);
+	stats->bytes += len;
 	stats->tso++;
-	stats->tso_bytes += total_bytes;
+	stats->tso_bytes = len;
 
 	return 0;
-
-err_out_abort:
-	while (rewind != q->head_idx) {
-		rewind_desc_info = &q->info[rewind];
-		ionic_tx_clean(q, rewind_desc_info, NULL, NULL);
-		rewind = (rewind + 1) & (q->num_descs - 1);
-	}
-	q->head_idx = abort;
-
-	return -ENOMEM;
 }
 
-static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+			      struct ionic_desc_info *desc_info)
 {
-	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
+	struct ionic_txq_desc *desc = desc_info->txq_desc;
+	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
-	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
 	bool encap;
@@ -961,23 +968,22 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 	has_vlan = !!skb_vlan_tag_present(skb);
 	encap = skb->encapsulation;
 
-	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
-	if (dma_mapping_error(dev, dma_addr))
-		return -ENOMEM;
-
 	flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
 	flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
 
 	cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL,
-				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+				  flags, skb_shinfo(skb)->nr_frags,
+				  buf_info->dma_addr);
 	desc->cmd = cpu_to_le64(cmd);
-	desc->len = cpu_to_le16(skb_headlen(skb));
-	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
-	desc->csum_offset = cpu_to_le16(skb->csum_offset);
+	desc->len = cpu_to_le16(buf_info->len);
 	if (has_vlan) {
 		desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
 		stats->vlan_inserted++;
+	} else {
+		desc->vlan_tci = 0;
 	}
+	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
+	desc->csum_offset = cpu_to_le16(skb->csum_offset);
 
 	if (skb_csum_is_sctp(skb))
 		stats->crc32_csum++;
@@ -987,12 +993,12 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 	return 0;
 }
 
-static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+				 struct ionic_desc_info *desc_info)
 {
-	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
+	struct ionic_txq_desc *desc = desc_info->txq_desc;
+	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
-	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
 	bool encap;
@@ -1001,67 +1007,66 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
 	has_vlan = !!skb_vlan_tag_present(skb);
 	encap = skb->encapsulation;
 
-	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
-	if (dma_mapping_error(dev, dma_addr))
-		return -ENOMEM;
-
 	flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
 	flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
 
 	cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE,
-				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+				  flags, skb_shinfo(skb)->nr_frags,
+				  buf_info->dma_addr);
 	desc->cmd = cpu_to_le64(cmd);
-	desc->len = cpu_to_le16(skb_headlen(skb));
+	desc->len = cpu_to_le16(buf_info->len);
 	if (has_vlan) {
 		desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
 		stats->vlan_inserted++;
+	} else {
+		desc->vlan_tci = 0;
 	}
+	desc->csum_start = 0;
+	desc->csum_offset = 0;
 
 	stats->csum_none++;
 
 	return 0;
 }
 
-static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+			      struct ionic_desc_info *desc_info)
 {
-	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
-	unsigned int len_left = skb->len - skb_headlen(skb);
+	struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
+	struct ionic_buf_info *buf_info = &desc_info->bufs[1];
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
-	dma_addr_t dma_addr;
-	skb_frag_t *frag;
-	u16 len;
+	unsigned int i;
 
-	for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) {
-		len = skb_frag_size(frag);
-		elem->len = cpu_to_le16(len);
-		dma_addr = ionic_tx_map_frag(q, frag, 0, len);
-		if (dma_mapping_error(dev, dma_addr))
-			return -ENOMEM;
-		elem->addr = cpu_to_le64(dma_addr);
-		len_left -= len;
-		stats->frags++;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, buf_info++, elem++) {
+		elem->addr = cpu_to_le64(buf_info->dma_addr);
+		elem->len = cpu_to_le16(buf_info->len);
 	}
 
+	stats->frags += skb_shinfo(skb)->nr_frags;
+
 	return 0;
 }
 
 static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 {
+	struct ionic_desc_info *desc_info = &q->info[q->head_idx];
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	int err;
 
+	if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
+		return -EIO;
+
 	/* set up the initial descriptor */
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		err = ionic_tx_calc_csum(q, skb);
+		err = ionic_tx_calc_csum(q, skb, desc_info);
 	else
-		err = ionic_tx_calc_no_csum(q, skb);
+		err = ionic_tx_calc_no_csum(q, skb, desc_info);
 	if (err)
 		return err;
 
 	/* add frags */
-	err = ionic_tx_skb_frags(q, skb);
+	err = ionic_tx_skb_frags(q, skb, desc_info);
 	if (err)
 		return err;
 
@@ -1077,7 +1082,6 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 
 static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 {
-	int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	int ndescs;
 	int err;
@@ -1088,7 +1092,8 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 	else
 		ndescs = 1;
 
-	if (skb_shinfo(skb)->nr_frags <= sg_elems)
+	/* If non-TSO, just need 1 desc and nr_frags sg elems */
+	if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems)
 		return ndescs;
 
 	/* Too many frags, so linearize */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 07824bf9d68d..dfaf10edfabf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -396,6 +396,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 		tpa_param->tpa_ipv6_en_flg = 1;
 		tpa_param->tpa_pkt_split_flg = 1;
 		tpa_param->tpa_gro_consistent_flg = 1;
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index b8dc5c4591ef..ed2b6fe5a78d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -4734,6 +4734,7 @@ void qed_inform_vf_link_state(struct qed_hwfn *hwfn)
 			 */
 			link.speed = (hwfn->cdev->num_hwfns > 1) ?
 				     100000 : 40000;
+			break;
 		default:
 			/* In auto mode pass PF link image to VF */
 			break;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 3d00b3232308..0be5ac7ab261 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -56,20 +56,22 @@ static void
 __rmnet_map_ingress_handler(struct sk_buff *skb,
 			    struct rmnet_port *port)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_endpoint *ep;
 	u16 len, pad;
 	u8 mux_id;
 
-	if (RMNET_MAP_GET_CD_BIT(skb)) {
+	if (map_header->flags & MAP_CMD_FLAG) {
+		/* Packet contains a MAP command (not data) */
 		if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
 
 		goto free_skb;
 	}
 
-	mux_id = RMNET_MAP_GET_MUX_ID(skb);
-	pad = RMNET_MAP_GET_PAD(skb);
-	len = RMNET_MAP_GET_LENGTH(skb) - pad;
+	mux_id = map_header->mux_id;
+	pad = map_header->flags & MAP_PAD_LEN_MASK;
+	len = ntohs(map_header->pkt_len) - pad;
 
 	if (mux_id >= RMNET_MAX_LOGICAL_EP)
 		goto free_skb;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 576501db2a0b..2aea153f4247 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -32,18 +32,6 @@ enum rmnet_map_commands {
 	RMNET_MAP_COMMAND_ENUM_LENGTH
 };
 
-#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
-				 (Y)->data)->mux_id)
-#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
-				(Y)->data)->cd_bit)
-#define RMNET_MAP_GET_PAD(Y) (((struct rmnet_map_header *) \
-				(Y)->data)->pad_len)
-#define RMNET_MAP_GET_CMD_START(Y) ((struct rmnet_map_control_command *) \
-				    ((Y)->data + \
-				      sizeof(struct rmnet_map_header)))
-#define RMNET_MAP_GET_LENGTH(Y) (ntohs(((struct rmnet_map_header *) \
-					(Y)->data)->pkt_len))
-
 #define RMNET_MAP_COMMAND_REQUEST     0
 #define RMNET_MAP_COMMAND_ACK         1
 #define RMNET_MAP_COMMAND_UNSUPPORTED 2
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index beaee4962128..add0f5ade2e6 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -12,12 +12,13 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
 				    struct rmnet_port *port,
 				    int enable)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_endpoint *ep;
 	struct net_device *vnd;
 	u8 mux_id;
 	int r;
 
-	mux_id = RMNET_MAP_GET_MUX_ID(skb);
+	mux_id = map_header->mux_id;
 
 	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
 		kfree_skb(skb);
@@ -49,6 +50,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 			       unsigned char type,
 			       struct rmnet_port *port)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_map_control_command *cmd;
 	struct net_device *dev = skb->dev;
 
@@ -58,7 +60,8 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 
 	skb->protocol = htons(ETH_P_MAP);
 
-	cmd = RMNET_MAP_GET_CMD_START(skb);
+	/* Command data immediately follows the MAP header */
+	cmd = (struct rmnet_map_control_command *)(map_header + 1);
 	cmd->cmd_type = type & 0x03;
 
 	netif_tx_lock(dev);
@@ -71,11 +74,13 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
  */
 void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_map_control_command *cmd;
 	unsigned char command_name;
 	unsigned char rc = 0;
 
-	cmd = RMNET_MAP_GET_CMD_START(skb);
+	/* Command data immediately follows the MAP header */
+	cmd = (struct rmnet_map_control_command *)(map_header + 1);
 	command_name = cmd->command_name;
 
 	switch (command_name) {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 21d38167f961..0ac2ff828320 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -197,22 +197,16 @@ rmnet_map_ipv4_ul_csum_header(void *iphdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	struct iphdr *ip4h = (struct iphdr *)iphdr;
-	__be16 *hdr = (__be16 *)ul_header, offset;
+	struct iphdr *ip4h = iphdr;
+	u16 val;
 
-	offset = htons((__force u16)(skb_transport_header(skb) -
-				     (unsigned char *)iphdr));
-	ul_header->csum_start_offset = offset;
-	ul_header->csum_insert_offset = skb->csum_offset;
-	ul_header->csum_enabled = 1;
+	val = MAP_CSUM_UL_ENABLED_FLAG;
 	if (ip4h->protocol == IPPROTO_UDP)
-		ul_header->udp_ind = 1;
-	else
-		ul_header->udp_ind = 0;
+		val |= MAP_CSUM_UL_UDP_FLAG;
+	val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK;
 
-	/* Changing remaining fields to network order */
-	hdr++;
-	*hdr = htons((__force u16)*hdr);
+	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
+	ul_header->csum_info = htons(val);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -239,23 +233,16 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr;
-	__be16 *hdr = (__be16 *)ul_header, offset;
-
-	offset = htons((__force u16)(skb_transport_header(skb) -
-				     (unsigned char *)ip6hdr));
-	ul_header->csum_start_offset = offset;
-	ul_header->csum_insert_offset = skb->csum_offset;
-	ul_header->csum_enabled = 1;
+	struct ipv6hdr *ip6h = ip6hdr;
+	u16 val;
 
+	val = MAP_CSUM_UL_ENABLED_FLAG;
 	if (ip6h->nexthdr == IPPROTO_UDP)
-		ul_header->udp_ind = 1;
-	else
-		ul_header->udp_ind = 0;
+		val |= MAP_CSUM_UL_UDP_FLAG;
+	val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK;
 
-	/* Changing remaining fields to network order */
-	hdr++;
-	*hdr = htons((__force u16)*hdr);
+	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
+	ul_header->csum_info = htons(val);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -284,6 +271,7 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 		return map_header;
 	}
 
+	BUILD_BUG_ON(MAP_PAD_LEN_MASK < 3);
 	padding = ALIGN(map_datalen, 4) - map_datalen;
 
 	if (padding == 0)
@@ -297,7 +285,8 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 
 done:
 	map_header->pkt_len = htons(map_datalen + padding);
-	map_header->pad_len = padding & 0x3F;
+	/* This is a data packet, so the CMD bit is 0 */
+	map_header->flags = padding & MAP_PAD_LEN_MASK;
 
 	return map_header;
 }
@@ -319,7 +308,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 		return NULL;
 
 	maph = (struct rmnet_map_header *)skb->data;
-	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
+	packet_len = ntohs(maph->pkt_len) + sizeof(*maph);
 
 	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
 		packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
@@ -328,7 +317,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 		return NULL;
 
 	/* Some hardware can send us empty frames. Catch them */
-	if (ntohs(maph->pkt_len) == 0)
+	if (!maph->pkt_len)
 		return NULL;
 
 	skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC);
@@ -361,7 +350,7 @@ int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
 
 	csum_trailer = (struct rmnet_map_dl_csum_trailer *)(skb->data + len);
 
-	if (!csum_trailer->valid) {
+	if (!(csum_trailer->flags & MAP_CSUM_DL_VALID_FLAG)) {
 		priv->stats.csum_valid_unset++;
 		return -EINVAL;
 	}
@@ -421,10 +410,7 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
 	}
 
 sw_csum:
-	ul_header->csum_start_offset = 0;
-	ul_header->csum_insert_offset = 0;
-	ul_header->csum_enabled = 0;
-	ul_header->udp_ind = 0;
+	memset(ul_header, 0, sizeof(*ul_header));
 
 	priv->stats.csum_sw++;
 }
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 581a92fc3292..7a8bb7e833f3 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1586,12 +1586,10 @@ DECLARE_RTL_COND(rtl_counters_cond)
 
 static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
 {
-	dma_addr_t paddr = tp->counters_phys_addr;
-	u32 cmd;
+	u32 cmd = lower_32_bits(tp->counters_phys_addr);
 
-	RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+	RTL_W32(tp, CounterAddrHigh, upper_32_bits(tp->counters_phys_addr));
 	rtl_pci_commit(tp);
-	cmd = (u64)paddr & DMA_BIT_MASK(32);
 	RTL_W32(tp, CounterAddrLow, cmd);
 	RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
@@ -1903,6 +1901,15 @@ static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data)
 	return ret;
 }
 
+static void rtl8169_get_ringparam(struct net_device *dev,
+				  struct ethtool_ringparam *data)
+{
+	data->rx_max_pending = NUM_RX_DESC;
+	data->rx_pending = NUM_RX_DESC;
+	data->tx_max_pending = NUM_TX_DESC;
+	data->tx_pending = NUM_TX_DESC;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
@@ -1923,6 +1930,7 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.set_eee		= rtl8169_set_eee,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.get_ringparam		= rtl8169_get_ringparam,
 };
 
 static void rtl_enable_eee(struct rtl8169_private *tp)
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f029c7c03804..ebedb1a11132 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -560,7 +560,7 @@ static struct sh_eth_cpu_data r7s72100_data = {
 			  EESR_TDE,
 	.fdr_value	= 0x0000070f,
 
-	.trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+	.trscer_err_mask = TRSCER_RMAFCE | TRSCER_RRFCE,
 
 	.no_psr		= 1,
 	.apr		= 1,
@@ -701,7 +701,7 @@ static struct sh_eth_cpu_data rcar_gen2_data = {
 			  EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE,
 	.fdr_value	= 0x00000f0f,
 
-	.trscer_err_mask = DESC_I_RINT8,
+	.trscer_err_mask = TRSCER_RMAFCE,
 
 	.apr		= 1,
 	.mpr		= 1,
@@ -782,7 +782,7 @@ static struct sh_eth_cpu_data r7s9210_data = {
 
 	.fdr_value	= 0x0000070f,
 
-	.trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+	.trscer_err_mask = TRSCER_RMAFCE | TRSCER_RRFCE,
 
 	.apr		= 1,
 	.mpr		= 1,
@@ -1094,7 +1094,7 @@ static struct sh_eth_cpu_data sh771x_data = {
 			  EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
 			  EESIPR_PREIP | EESIPR_CERFIP,
 
-	.trscer_err_mask = DESC_I_RINT8,
+	.trscer_err_mask = TRSCER_RMAFCE,
 
 	.tsu		= 1,
 	.dual_port	= 1,
@@ -1749,7 +1749,7 @@ static void sh_eth_emac_interrupt(struct net_device *ndev)
 		link_stat = sh_eth_read(ndev, PSR);
 		if (mdp->ether_link_active_low)
 			link_stat = ~link_stat;
-		if (!(link_stat & PHY_ST_LINK)) {
+		if (!(link_stat & PSR_LMON)) {
 			sh_eth_rcv_snd_disable(ndev);
 		} else {
 			/* Link Up */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index c1b3751b12c4..a5c07c6ff44a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -171,7 +171,7 @@ enum GECMR_BIT {
 };
 
 /* EDMR */
-enum DMAC_M_BIT {
+enum EDMR_BIT {
 	EDMR_NBST = 0x80,
 	EDMR_EL = 0x40, /* Litte endian */
 	EDMR_DL1 = 0x20, EDMR_DL0 = 0x10,
@@ -180,13 +180,13 @@ enum DMAC_M_BIT {
 };
 
 /* EDTRR */
-enum DMAC_T_BIT {
+enum EDTRR_BIT {
 	EDTRR_TRNS_GETHER = 0x03,
 	EDTRR_TRNS_ETHER = 0x01,
 };
 
 /* EDRRR */
-enum EDRRR_R_BIT {
+enum EDRRR_BIT {
 	EDRRR_R = 0x01,
 };
 
@@ -208,7 +208,7 @@ enum PIR_BIT {
 };
 
 /* PSR */
-enum PHY_STATUS_BIT { PHY_ST_LINK = 0x01, };
+enum PSR_BIT { PSR_LMON = 0x01, };
 
 /* EESR */
 enum EESR_BIT {
@@ -288,27 +288,6 @@ enum EESIPR_BIT {
 	EESIPR_CERFIP	= 0x00000001,
 };
 
-/* Receive descriptor 0 bits */
-enum RD_STS_BIT {
-	RD_RACT = 0x80000000, RD_RDLE = 0x40000000,
-	RD_RFP1 = 0x20000000, RD_RFP0 = 0x10000000,
-	RD_RFE = 0x08000000, RD_RFS10 = 0x00000200,
-	RD_RFS9 = 0x00000100, RD_RFS8 = 0x00000080,
-	RD_RFS7 = 0x00000040, RD_RFS6 = 0x00000020,
-	RD_RFS5 = 0x00000010, RD_RFS4 = 0x00000008,
-	RD_RFS3 = 0x00000004, RD_RFS2 = 0x00000002,
-	RD_RFS1 = 0x00000001,
-};
-#define RDF1ST	RD_RFP1
-#define RDFEND	RD_RFP0
-#define RD_RFP	(RD_RFP1|RD_RFP0)
-
-/* Receive descriptor 1 bits */
-enum RD_LEN_BIT {
-	RD_RFL	= 0x0000ffff,	/* receive frame  length */
-	RD_RBL	= 0xffff0000,	/* receive buffer length */
-};
-
 /* FCFTR */
 enum FCFTR_BIT {
 	FCFTR_RFF2 = 0x00040000, FCFTR_RFF1 = 0x00020000,
@@ -318,28 +297,13 @@ enum FCFTR_BIT {
 #define DEFAULT_FIFO_F_D_RFF	(FCFTR_RFF2 | FCFTR_RFF1 | FCFTR_RFF0)
 #define DEFAULT_FIFO_F_D_RFD	(FCFTR_RFD2 | FCFTR_RFD1 | FCFTR_RFD0)
 
-/* Transmit descriptor 0 bits */
-enum TD_STS_BIT {
-	TD_TACT = 0x80000000, TD_TDLE = 0x40000000,
-	TD_TFP1 = 0x20000000, TD_TFP0 = 0x10000000,
-	TD_TFE  = 0x08000000, TD_TWBI = 0x04000000,
-};
-#define TDF1ST	TD_TFP1
-#define TDFEND	TD_TFP0
-#define TD_TFP	(TD_TFP1|TD_TFP0)
-
-/* Transmit descriptor 1 bits */
-enum TD_LEN_BIT {
-	TD_TBL	= 0xffff0000,	/* transmit buffer length */
-};
-
 /* RMCR */
 enum RMCR_BIT {
 	RMCR_RNC = 0x00000001,
 };
 
 /* ECMR */
-enum FELIC_MODE_BIT {
+enum ECMR_BIT {
 	ECMR_TRCCM = 0x04000000, ECMR_RCSC = 0x00800000,
 	ECMR_DPAD = 0x00200000, ECMR_RZPF = 0x00100000,
 	ECMR_ZPF = 0x00080000, ECMR_PFR = 0x00040000, ECMR_RXF = 0x00020000,
@@ -350,7 +314,7 @@ enum FELIC_MODE_BIT {
 };
 
 /* ECSR */
-enum ECSR_STATUS_BIT {
+enum ECSR_BIT {
 	ECSR_BRCRX = 0x20, ECSR_PSRTO = 0x10,
 	ECSR_LCHNG = 0x04,
 	ECSR_MPD = 0x02, ECSR_ICD = 0x01,
@@ -360,7 +324,7 @@ enum ECSR_STATUS_BIT {
 				 ECSR_ICD | ECSIPR_MPDIP)
 
 /* ECSIPR */
-enum ECSIPR_STATUS_MASK_BIT {
+enum ECSIPR_BIT {
 	ECSIPR_BRCRXIP = 0x20, ECSIPR_PSRTOIP = 0x10,
 	ECSIPR_LCHNGIP = 0x04,
 	ECSIPR_MPDIP = 0x02, ECSIPR_ICDIP = 0x01,
@@ -380,14 +344,20 @@ enum MPR_BIT {
 };
 
 /* TRSCER */
-enum DESC_I_BIT {
-	DESC_I_TINT4 = 0x0800, DESC_I_TINT3 = 0x0400, DESC_I_TINT2 = 0x0200,
-	DESC_I_TINT1 = 0x0100, DESC_I_RINT8 = 0x0080, DESC_I_RINT5 = 0x0010,
-	DESC_I_RINT4 = 0x0008, DESC_I_RINT3 = 0x0004, DESC_I_RINT2 = 0x0002,
-	DESC_I_RINT1 = 0x0001,
+enum TRSCER_BIT {
+	TRSCER_CNDCE	= 0x00000800,
+	TRSCER_DLCCE	= 0x00000400,
+	TRSCER_CDCE	= 0x00000200,
+	TRSCER_TROCE	= 0x00000100,
+	TRSCER_RMAFCE	= 0x00000080,
+	TRSCER_RRFCE	= 0x00000010,
+	TRSCER_RTLFCE	= 0x00000008,
+	TRSCER_RTSFCE	= 0x00000004,
+	TRSCER_PRECE	= 0x00000002,
+	TRSCER_CERFCE	= 0x00000001,
 };
 
-#define DEFAULT_TRSCER_ERR_MASK (DESC_I_RINT8 | DESC_I_RINT5 | DESC_I_TINT2)
+#define DEFAULT_TRSCER_ERR_MASK (TRSCER_RMAFCE | TRSCER_RRFCE | TRSCER_CDCE)
 
 /* RPADIR */
 enum RPADIR_BIT {
@@ -445,6 +415,24 @@ struct sh_eth_txdesc {
 	u32 pad0;		/* padding data */
 } __aligned(2) __packed;
 
+/* Transmit descriptor 0 bits */
+enum TD_STS_BIT {
+	TD_TACT	= 0x80000000,
+	TD_TDLE	= 0x40000000,
+	TD_TFP1	= 0x20000000,
+	TD_TFP0	= 0x10000000,
+	TD_TFE	= 0x08000000,
+	TD_TWBI	= 0x04000000,
+};
+#define TDF1ST	TD_TFP1
+#define TDFEND	TD_TFP0
+#define TD_TFP	(TD_TFP1 | TD_TFP0)
+
+/* Transmit descriptor 1 bits */
+enum TD_LEN_BIT {
+	TD_TBL	= 0xffff0000,	/* transmit buffer length */
+};
+
 /* The sh ether Rx buffer descriptors.
  * This structure should be 20 bytes.
  */
@@ -455,6 +443,34 @@ struct sh_eth_rxdesc {
 	u32 pad0;		/* padding data */
 } __aligned(2) __packed;
 
+/* Receive descriptor 0 bits */
+enum RD_STS_BIT {
+	RD_RACT	= 0x80000000,
+	RD_RDLE	= 0x40000000,
+	RD_RFP1	= 0x20000000,
+	RD_RFP0	= 0x10000000,
+	RD_RFE	= 0x08000000,
+	RD_RFS10 = 0x00000200,
+	RD_RFS9	= 0x00000100,
+	RD_RFS8	= 0x00000080,
+	RD_RFS7	= 0x00000040,
+	RD_RFS6	= 0x00000020,
+	RD_RFS5	= 0x00000010,
+	RD_RFS4	= 0x00000008,
+	RD_RFS3	= 0x00000004,
+	RD_RFS2	= 0x00000002,
+	RD_RFS1	= 0x00000001,
+};
+#define RDF1ST	RD_RFP1
+#define RDFEND	RD_RFP0
+#define RD_RFP	(RD_RFP1 | RD_RFP0)
+
+/* Receive descriptor 1 bits */
+enum RD_LEN_BIT {
+	RD_RFL	= 0x0000ffff,	/* receive frame  length */
+	RD_RBL	= 0xffff0000,	/* receive buffer length */
+};
+
 /* This structure is used by each CPU dependency handling. */
 struct sh_eth_cpu_data {
 	/* mandatory functions */
diff --git a/drivers/net/ethernet/sfc/falcon/net_driver.h b/drivers/net/ethernet/sfc/falcon/net_driver.h
index a529ff395ead..a381cf9ec4f3 100644
--- a/drivers/net/ethernet/sfc/falcon/net_driver.h
+++ b/drivers/net/ethernet/sfc/falcon/net_driver.h
@@ -637,7 +637,7 @@ union ef4_multicast_hash {
  * struct ef4_nic - an Efx NIC
  * @name: Device name (net device name or bus id before net device registered)
  * @pci_dev: The PCI device
- * @node: List node for maintaning primary/secondary function lists
+ * @node: List node for maintaining primary/secondary function lists
  * @primary: &struct ef4_nic instance for the primary function of this
  *	controller.  May be the same structure, and may be %NULL if no
  *	primary function is bound.  Serialised by rtnl_lock.
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 6f271c46368d..d065b11b7b10 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -182,6 +182,12 @@ struct stmmac_extra_stats {
 	/* TSO */
 	unsigned long tx_tso_frames;
 	unsigned long tx_tso_nfrags;
+	/* EST */
+	unsigned long mtl_est_cgce;
+	unsigned long mtl_est_hlbs;
+	unsigned long mtl_est_hlbf;
+	unsigned long mtl_est_btre;
+	unsigned long mtl_est_btrlm;
 };
 
 /* Safety Feature statistics exposed by ethtool */
@@ -309,6 +315,13 @@ enum dma_irq_status {
 #define	CORE_IRQ_RX_PATH_IN_LPI_MODE	(1 << 2)
 #define	CORE_IRQ_RX_PATH_EXIT_LPI_MODE	(1 << 3)
 
+/* FPE defines */
+#define FPE_EVENT_UNKNOWN		0
+#define FPE_EVENT_TRSP			BIT(0)
+#define FPE_EVENT_TVER			BIT(1)
+#define FPE_EVENT_RRSP			BIT(2)
+#define FPE_EVENT_RVER			BIT(3)
+
 #define CORE_IRQ_MTL_RX_OVERFLOW	BIT(8)
 
 /* Physical Coding Sublayer */
@@ -382,6 +395,8 @@ struct dma_features {
 	unsigned int estsel;
 	unsigned int fpesel;
 	unsigned int tbssel;
+	/* Numbers of Auxiliary Snapshot Inputs */
+	unsigned int aux_snapshot_n;
 };
 
 /* RX Buffer size must be multiple of 4/8/16 bytes */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 223f69da7e95..c1a361305a5a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -90,6 +90,32 @@ imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
 	return ret;
 }
 
+static int imx_dwmac_clks_config(void *priv, bool enabled)
+{
+	struct imx_priv_data *dwmac = priv;
+	int ret = 0;
+
+	if (enabled) {
+		ret = clk_prepare_enable(dwmac->clk_mem);
+		if (ret) {
+			dev_err(dwmac->dev, "mem clock enable failed\n");
+			return ret;
+		}
+
+		ret = clk_prepare_enable(dwmac->clk_tx);
+		if (ret) {
+			dev_err(dwmac->dev, "tx clock enable failed\n");
+			clk_disable_unprepare(dwmac->clk_mem);
+			return ret;
+		}
+	} else {
+		clk_disable_unprepare(dwmac->clk_tx);
+		clk_disable_unprepare(dwmac->clk_mem);
+	}
+
+	return ret;
+}
+
 static int imx_dwmac_init(struct platform_device *pdev, void *priv)
 {
 	struct plat_stmmacenet_data *plat_dat;
@@ -98,39 +124,18 @@ static int imx_dwmac_init(struct platform_device *pdev, void *priv)
 
 	plat_dat = dwmac->plat_dat;
 
-	ret = clk_prepare_enable(dwmac->clk_mem);
-	if (ret) {
-		dev_err(&pdev->dev, "mem clock enable failed\n");
-		return ret;
-	}
-
-	ret = clk_prepare_enable(dwmac->clk_tx);
-	if (ret) {
-		dev_err(&pdev->dev, "tx clock enable failed\n");
-		goto clk_tx_en_failed;
-	}
-
 	if (dwmac->ops->set_intf_mode) {
 		ret = dwmac->ops->set_intf_mode(plat_dat);
 		if (ret)
-			goto intf_mode_failed;
+			return ret;
 	}
 
 	return 0;
-
-intf_mode_failed:
-	clk_disable_unprepare(dwmac->clk_tx);
-clk_tx_en_failed:
-	clk_disable_unprepare(dwmac->clk_mem);
-	return ret;
 }
 
 static void imx_dwmac_exit(struct platform_device *pdev, void *priv)
 {
-	struct imx_priv_data *dwmac = priv;
-
-	clk_disable_unprepare(dwmac->clk_tx);
-	clk_disable_unprepare(dwmac->clk_mem);
+	/* nothing to do now */
 }
 
 static void imx_dwmac_fix_speed(void *priv, unsigned int speed)
@@ -249,10 +254,15 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 	plat_dat->addr64 = dwmac->ops->addr_width;
 	plat_dat->init = imx_dwmac_init;
 	plat_dat->exit = imx_dwmac_exit;
+	plat_dat->clks_config = imx_dwmac_clks_config;
 	plat_dat->fix_mac_speed = imx_dwmac_fix_speed;
 	plat_dat->bsp_priv = dwmac;
 	dwmac->plat_dat = plat_dat;
 
+	ret = imx_dwmac_clks_config(dwmac, true);
+	if (ret)
+		goto err_clks_config;
+
 	ret = imx_dwmac_init(pdev, dwmac);
 	if (ret)
 		goto err_dwmac_init;
@@ -263,9 +273,11 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 
 	return 0;
 
-err_dwmac_init:
 err_drv_probe:
 	imx_dwmac_exit(pdev, plat_dat->bsp_priv);
+err_dwmac_init:
+	imx_dwmac_clks_config(dwmac, false);
+err_clks_config:
 err_parse_dt:
 err_match_data:
 	stmmac_remove_config_dt(pdev, plat_dat);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 0b64f7710d17..992294d25706 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -8,9 +8,23 @@
 #include "dwmac-intel.h"
 #include "dwmac4.h"
 #include "stmmac.h"
+#include "stmmac_ptp.h"
+
+#define INTEL_MGBE_ADHOC_ADDR	0x15
+#define INTEL_MGBE_XPCS_ADDR	0x16
+
+/* Selection for PTP Clock Freq belongs to PSE & PCH GbE */
+#define PSE_PTP_CLK_FREQ_MASK		(GMAC_GPO0 | GMAC_GPO3)
+#define PSE_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
+#define PSE_PTP_CLK_FREQ_200MHZ		(GMAC_GPO0 | GMAC_GPO3)
+#define PSE_PTP_CLK_FREQ_256MHZ		(0)
+#define PCH_PTP_CLK_FREQ_MASK		(GMAC_GPO0)
+#define PCH_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
+#define PCH_PTP_CLK_FREQ_200MHZ		(0)
 
 struct intel_priv_data {
 	int mdio_adhoc_addr;	/* mdio address for serdes & etc */
+	bool is_pse;
 };
 
 /* This struct is used to associate PCI Function of MAC controller on a board,
@@ -201,6 +215,134 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data)
 	}
 }
 
+/* Program PTP Clock Frequency for different variant of
+ * Intel mGBE that has slightly different GPO mapping
+ */
+static void intel_mgbe_ptp_clk_freq_config(void *npriv)
+{
+	struct stmmac_priv *priv = (struct stmmac_priv *)npriv;
+	struct intel_priv_data *intel_priv;
+	u32 gpio_value;
+
+	intel_priv = (struct intel_priv_data *)priv->plat->bsp_priv;
+
+	gpio_value = readl(priv->ioaddr + GMAC_GPIO_STATUS);
+
+	if (intel_priv->is_pse) {
+		/* For PSE GbE, use 200MHz */
+		gpio_value &= ~PSE_PTP_CLK_FREQ_MASK;
+		gpio_value |= PSE_PTP_CLK_FREQ_200MHZ;
+	} else {
+		/* For PCH GbE, use 200MHz */
+		gpio_value &= ~PCH_PTP_CLK_FREQ_MASK;
+		gpio_value |= PCH_PTP_CLK_FREQ_200MHZ;
+	}
+
+	writel(gpio_value, priv->ioaddr + GMAC_GPIO_STATUS);
+}
+
+static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr,
+			u64 *art_time)
+{
+	u64 ns;
+
+	ns = mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE3);
+	ns <<= GMAC4_ART_TIME_SHIFT;
+	ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE2);
+	ns <<= GMAC4_ART_TIME_SHIFT;
+	ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE1);
+	ns <<= GMAC4_ART_TIME_SHIFT;
+	ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE0);
+
+	*art_time = ns;
+}
+
+static int intel_crosststamp(ktime_t *device,
+			     struct system_counterval_t *system,
+			     void *ctx)
+{
+	struct intel_priv_data *intel_priv;
+
+	struct stmmac_priv *priv = (struct stmmac_priv *)ctx;
+	void __iomem *ptpaddr = priv->ptpaddr;
+	void __iomem *ioaddr = priv->hw->pcsr;
+	unsigned long flags;
+	u64 art_time = 0;
+	u64 ptp_time = 0;
+	u32 num_snapshot;
+	u32 gpio_value;
+	u32 acr_value;
+	int ret;
+	u32 v;
+	int i;
+
+	if (!boot_cpu_has(X86_FEATURE_ART))
+		return -EOPNOTSUPP;
+
+	intel_priv = priv->plat->bsp_priv;
+
+	/* Enable Internal snapshot trigger */
+	acr_value = readl(ptpaddr + PTP_ACR);
+	acr_value &= ~PTP_ACR_MASK;
+	switch (priv->plat->int_snapshot_num) {
+	case AUX_SNAPSHOT0:
+		acr_value |= PTP_ACR_ATSEN0;
+		break;
+	case AUX_SNAPSHOT1:
+		acr_value |= PTP_ACR_ATSEN1;
+		break;
+	case AUX_SNAPSHOT2:
+		acr_value |= PTP_ACR_ATSEN2;
+		break;
+	case AUX_SNAPSHOT3:
+		acr_value |= PTP_ACR_ATSEN3;
+		break;
+	default:
+		return -EINVAL;
+	}
+	writel(acr_value, ptpaddr + PTP_ACR);
+
+	/* Clear FIFO */
+	acr_value = readl(ptpaddr + PTP_ACR);
+	acr_value |= PTP_ACR_ATSFC;
+	writel(acr_value, ptpaddr + PTP_ACR);
+
+	/* Trigger Internal snapshot signal
+	 * Create a rising edge by just toggle the GPO1 to low
+	 * and back to high.
+	 */
+	gpio_value = readl(ioaddr + GMAC_GPIO_STATUS);
+	gpio_value &= ~GMAC_GPO1;
+	writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
+	gpio_value |= GMAC_GPO1;
+	writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
+
+	/* Poll for time sync operation done */
+	ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
+				 (v & GMAC_INT_TSIE), 100, 10000);
+
+	if (ret == -ETIMEDOUT) {
+		pr_err("%s: Wait for time sync operation timeout\n", __func__);
+		return ret;
+	}
+
+	num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
+			GMAC_TIMESTAMP_ATSNS_MASK) >>
+			GMAC_TIMESTAMP_ATSNS_SHIFT;
+
+	/* Repeat until the timestamps are from the FIFO last segment */
+	for (i = 0; i < num_snapshot; i++) {
+		spin_lock_irqsave(&priv->ptp_lock, flags);
+		stmmac_get_ptptime(priv, ptpaddr, &ptp_time);
+		*device = ns_to_ktime(ptp_time);
+		spin_unlock_irqrestore(&priv->ptp_lock, flags);
+		get_arttime(priv->mii, intel_priv->mdio_adhoc_addr, &art_time);
+		*system = convert_art_to_tsc(art_time);
+	}
+
+	return 0;
+}
+
 static void common_default_data(struct plat_stmmacenet_data *plat)
 {
 	plat->clk_csr = 2;	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
@@ -319,6 +461,8 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 		return ret;
 	}
 
+	plat->ptp_clk_freq_config = intel_mgbe_ptp_clk_freq_config;
+
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
 
@@ -333,6 +477,21 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	/* Use the last Rx queue */
 	plat->vlan_fail_q = plat->rx_queues_to_use - 1;
 
+	/* Intel mgbe SGMII interface uses pcs-xcps */
+	if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		plat->mdio_bus_data->has_xpcs = true;
+		plat->mdio_bus_data->xpcs_an_inband = true;
+	}
+
+	/* Ensure mdio bus scan skips intel serdes and pcs-xpcs */
+	plat->mdio_bus_data->phy_mask = 1 << INTEL_MGBE_ADHOC_ADDR;
+	plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR;
+
+	plat->int_snapshot_num = AUX_SNAPSHOT1;
+
+	plat->has_crossts = true;
+	plat->crosststamp = intel_crosststamp;
+
 	return 0;
 }
 
@@ -378,8 +537,12 @@ static struct stmmac_pci_info ehl_rgmii1g_info = {
 static int ehl_pse0_common_data(struct pci_dev *pdev,
 				struct plat_stmmacenet_data *plat)
 {
+	struct intel_priv_data *intel_priv = plat->bsp_priv;
+
+	intel_priv->is_pse = true;
 	plat->bus_id = 2;
 	plat->addr64 = 32;
+
 	return ehl_common_data(pdev, plat);
 }
 
@@ -410,8 +573,12 @@ static struct stmmac_pci_info ehl_pse0_sgmii1g_info = {
 static int ehl_pse1_common_data(struct pci_dev *pdev,
 				struct plat_stmmacenet_data *plat)
 {
+	struct intel_priv_data *intel_priv = plat->bsp_priv;
+
+	intel_priv->is_pse = true;
 	plat->bus_id = 3;
 	plat->addr64 = 32;
+
 	return ehl_common_data(pdev, plat);
 }
 
@@ -664,7 +831,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 
 	plat->bsp_priv = intel_priv;
-	intel_priv->mdio_adhoc_addr = 0x15;
+	intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR;
 
 	ret = info->setup(pdev, plat);
 	if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index bf3250e0e59c..749585fe6fc9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -352,6 +352,8 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	plat_dat->bsp_priv = gmac;
 	plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed;
 	plat_dat->multicast_filter_bins = 0;
+	plat_dat->tx_fifo_size = 8192;
+	plat_dat->rx_fifo_size = 8192;
 
 	err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (err)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 2bac49b49f73..90383abafa66 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -255,7 +255,7 @@ static void dwmac1000_get_hw_feature(void __iomem *ioaddr,
 }
 
 static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
-				  u32 number_chan)
+				  u32 queue)
 {
 	writel(riwt, ioaddr + DMA_RX_WATCHDOG);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 82df91c130f7..462ca7ed095a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -42,6 +42,7 @@
 #define GMAC_HW_FEATURE3		0x00000128
 #define GMAC_MDIO_ADDR			0x00000200
 #define GMAC_MDIO_DATA			0x00000204
+#define GMAC_GPIO_STATUS		0x0000020C
 #define GMAC_ARP_ADDR			0x00000210
 #define GMAC_ADDR_HIGH(reg)		(0x300 + reg * 8)
 #define GMAC_ADDR_LOW(reg)		(0x304 + reg * 8)
@@ -49,6 +50,7 @@
 #define GMAC_L4_ADDR(reg)		(0x904 + (reg) * 0x30)
 #define GMAC_L3_ADDR0(reg)		(0x910 + (reg) * 0x30)
 #define GMAC_L3_ADDR1(reg)		(0x914 + (reg) * 0x30)
+#define GMAC_TIMESTAMP_STATUS		0x00000b20
 
 /* RX Queues Routing */
 #define GMAC_RXQCTRL_AVCPQ_MASK		GENMASK(2, 0)
@@ -143,6 +145,7 @@
 #define GMAC_INT_PCS_PHYIS		BIT(3)
 #define GMAC_INT_PMT_EN			BIT(4)
 #define GMAC_INT_LPI_EN			BIT(5)
+#define GMAC_INT_TSIE			BIT(12)
 
 #define	GMAC_PCS_IRQ_DEFAULT	(GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK |	\
 				 GMAC_INT_PCS_ANE)
@@ -259,6 +262,7 @@ enum power_event {
 #define GMAC_HW_RXFIFOSIZE		GENMASK(4, 0)
 
 /* MAC HW features2 bitmap */
+#define GMAC_HW_FEAT_AUXSNAPNUM		GENMASK(30, 28)
 #define GMAC_HW_FEAT_PPSOUTNUM		GENMASK(26, 24)
 #define GMAC_HW_FEAT_TXCHCNT		GENMASK(21, 18)
 #define GMAC_HW_FEAT_RXCHCNT		GENMASK(15, 12)
@@ -278,6 +282,12 @@ enum power_event {
 #define GMAC_HW_FEAT_DVLAN		BIT(5)
 #define GMAC_HW_FEAT_NRVF		GENMASK(2, 0)
 
+/* GMAC GPIO Status reg */
+#define GMAC_GPO0			BIT(16)
+#define GMAC_GPO1			BIT(17)
+#define GMAC_GPO2			BIT(18)
+#define GMAC_GPO3			BIT(19)
+
 /* MAC HW ADDR regs */
 #define GMAC_HI_DCS			GENMASK(18, 16)
 #define GMAC_HI_DCS_SHIFT		16
@@ -298,6 +308,11 @@ enum power_event {
 #define GMAC_L4DP0_SHIFT		16
 #define GMAC_L4SP0			GENMASK(15, 0)
 
+/* MAC Timestamp Status */
+#define GMAC_TIMESTAMP_AUXTSTRIG	BIT(2)
+#define GMAC_TIMESTAMP_ATSNS_MASK	GENMASK(29, 25)
+#define GMAC_TIMESTAMP_ATSNS_SHIFT	25
+
 /*  MTL registers */
 #define MTL_OPERATION_MODE		0x00000c00
 #define MTL_FRPE			BIT(15)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 29f765a246a0..95864f014ffa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -53,6 +53,10 @@ static void dwmac4_core_init(struct mac_device_info *hw,
 	if (hw->pcs)
 		value |= GMAC_PCS_IRQ_DEFAULT;
 
+	/* Enable FPE interrupt */
+	if ((GMAC_HW_FEAT_FPESEL & readl(ioaddr + GMAC_HW_FEATURE3)) >> 26)
+		value |= GMAC_INT_FPE_EN;
+
 	writel(value, ioaddr + GMAC_INT_EN);
 }
 
@@ -1245,6 +1249,8 @@ const struct stmmac_ops dwmac410_ops = {
 	.config_l4_filter = dwmac4_config_l4_filter,
 	.est_configure = dwmac5_est_configure,
 	.fpe_configure = dwmac5_fpe_configure,
+	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
+	.fpe_irq_status = dwmac5_fpe_irq_status,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
@@ -1294,6 +1300,8 @@ const struct stmmac_ops dwmac510_ops = {
 	.config_l4_filter = dwmac4_config_l4_filter,
 	.est_configure = dwmac5_est_configure,
 	.fpe_configure = dwmac5_fpe_configure,
+	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
+	.fpe_irq_status = dwmac5_fpe_irq_status,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 62aa0e95beb7..8954b85eb850 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -210,12 +210,9 @@ static void dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
 		_dwmac4_dump_dma_regs(ioaddr, i, reg_space);
 }
 
-static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan)
+static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue)
 {
-	u32 chan;
-
-	for (chan = 0; chan < number_chan; chan++)
-		writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(chan));
+	writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(queue));
 }
 
 static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
@@ -415,6 +412,8 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
 	/* IEEE 1588-2002 */
 	dma_cap->time_stamp = 0;
+	/* Number of Auxiliary Snapshot Inputs */
+	dma_cap->aux_snapshot_n = (hw_cap & GMAC_HW_FEAT_AUXSNAPNUM) >> 28;
 
 	/* MAC HW feature3 */
 	hw_cap = readl(ioaddr + GMAC_HW_FEATURE3);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 8f7ac24545ef..5b010ebfede9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -595,9 +595,95 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 		ctrl &= ~EEST;
 
 	writel(ctrl, ioaddr + MTL_EST_CONTROL);
+
+	/* Configure EST interrupt */
+	if (cfg->enable)
+		ctrl = (IECGCE | IEHS | IEHF | IEBE | IECC);
+	else
+		ctrl = 0;
+
+	writel(ctrl, ioaddr + MTL_EST_INT_EN);
+
 	return 0;
 }
 
+void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
+			  struct stmmac_extra_stats *x, u32 txqcnt)
+{
+	u32 status, value, feqn, hbfq, hbfs, btrl;
+	u32 txqcnt_mask = (1 << txqcnt) - 1;
+
+	status = readl(ioaddr + MTL_EST_STATUS);
+
+	value = (CGCE | HLBS | HLBF | BTRE | SWLC);
+
+	/* Return if there is no error */
+	if (!(status & value))
+		return;
+
+	if (status & CGCE) {
+		/* Clear Interrupt */
+		writel(CGCE, ioaddr + MTL_EST_STATUS);
+
+		x->mtl_est_cgce++;
+	}
+
+	if (status & HLBS) {
+		value = readl(ioaddr + MTL_EST_SCH_ERR);
+		value &= txqcnt_mask;
+
+		x->mtl_est_hlbs++;
+
+		/* Clear Interrupt */
+		writel(value, ioaddr + MTL_EST_SCH_ERR);
+
+		/* Collecting info to shows all the queues that has HLBS
+		 * issue. The only way to clear this is to clear the
+		 * statistic
+		 */
+		if (net_ratelimit())
+			netdev_err(dev, "EST: HLB(sched) Queue 0x%x\n", value);
+	}
+
+	if (status & HLBF) {
+		value = readl(ioaddr + MTL_EST_FRM_SZ_ERR);
+		feqn = value & txqcnt_mask;
+
+		value = readl(ioaddr + MTL_EST_FRM_SZ_CAP);
+		hbfq = (value & SZ_CAP_HBFQ_MASK(txqcnt)) >> SZ_CAP_HBFQ_SHIFT;
+		hbfs = value & SZ_CAP_HBFS_MASK;
+
+		x->mtl_est_hlbf++;
+
+		/* Clear Interrupt */
+		writel(feqn, ioaddr + MTL_EST_FRM_SZ_ERR);
+
+		if (net_ratelimit())
+			netdev_err(dev, "EST: HLB(size) Queue %u Size %u\n",
+				   hbfq, hbfs);
+	}
+
+	if (status & BTRE) {
+		if ((status & BTRL) == BTRL_MAX)
+			x->mtl_est_btrlm++;
+		else
+			x->mtl_est_btre++;
+
+		btrl = (status & BTRL) >> BTRL_SHIFT;
+
+		if (net_ratelimit())
+			netdev_info(dev, "EST: BTR Error Loop Count %u\n",
+				    btrl);
+
+		writel(BTRE, ioaddr + MTL_EST_STATUS);
+	}
+
+	if (status & SWLC) {
+		writel(SWLC, ioaddr + MTL_EST_STATUS);
+		netdev_info(dev, "EST: SWOL has been switched\n");
+	}
+}
+
 void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			  bool enable)
 {
@@ -621,3 +707,52 @@ void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 	value |= EFPE;
 	writel(value, ioaddr + MAC_FPE_CTRL_STS);
 }
+
+int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev)
+{
+	u32 value;
+	int status;
+
+	status = FPE_EVENT_UNKNOWN;
+
+	value = readl(ioaddr + MAC_FPE_CTRL_STS);
+
+	if (value & TRSP) {
+		status |= FPE_EVENT_TRSP;
+		netdev_info(dev, "FPE: Respond mPacket is transmitted\n");
+	}
+
+	if (value & TVER) {
+		status |= FPE_EVENT_TVER;
+		netdev_info(dev, "FPE: Verify mPacket is transmitted\n");
+	}
+
+	if (value & RRSP) {
+		status |= FPE_EVENT_RRSP;
+		netdev_info(dev, "FPE: Respond mPacket is received\n");
+	}
+
+	if (value & RVER) {
+		status |= FPE_EVENT_RVER;
+		netdev_info(dev, "FPE: Verify mPacket is received\n");
+	}
+
+	return status;
+}
+
+void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, enum stmmac_mpacket_type type)
+{
+	u32 value;
+
+	value = readl(ioaddr + MAC_FPE_CTRL_STS);
+
+	if (type == MPACKET_VERIFY) {
+		value &= ~SRSP;
+		value |= SVER;
+	} else {
+		value &= ~SVER;
+		value |= SRSP;
+	}
+
+	writel(value, ioaddr + MAC_FPE_CTRL_STS);
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 56b0762c1276..ff555d8b0cdf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -12,6 +12,12 @@
 #define TMOUTEN				BIT(0)
 
 #define MAC_FPE_CTRL_STS		0x00000234
+#define TRSP				BIT(19)
+#define TVER				BIT(18)
+#define RRSP				BIT(17)
+#define RVER				BIT(16)
+#define SRSP				BIT(2)
+#define SVER				BIT(1)
 #define EFPE				BIT(0)
 
 #define MAC_PPS_CONTROL			0x00000b70
@@ -38,6 +44,36 @@
 #define PTOV_SHIFT			24
 #define SSWL				BIT(1)
 #define EEST				BIT(0)
+
+#define MTL_EST_STATUS			0x00000c58
+#define BTRL				GENMASK(11, 8)
+#define BTRL_SHIFT			8
+#define BTRL_MAX			(0xF << BTRL_SHIFT)
+#define SWOL				BIT(7)
+#define SWOL_SHIFT			7
+#define CGCE				BIT(4)
+#define HLBS				BIT(3)
+#define HLBF				BIT(2)
+#define BTRE				BIT(1)
+#define SWLC				BIT(0)
+
+#define MTL_EST_SCH_ERR			0x00000c60
+#define MTL_EST_FRM_SZ_ERR		0x00000c64
+#define MTL_EST_FRM_SZ_CAP		0x00000c68
+#define SZ_CAP_HBFS_MASK		GENMASK(14, 0)
+#define SZ_CAP_HBFQ_SHIFT		16
+#define SZ_CAP_HBFQ_MASK(_val)		({ typeof(_val) (val) = (_val);	\
+					((val) > 4 ? GENMASK(18, 16) :	\
+					 (val) > 2 ? GENMASK(17, 16) :	\
+					 BIT(16)); })
+
+#define MTL_EST_INT_EN			0x00000c70
+#define IECGCE				CGCE
+#define IEHS				HLBS
+#define IEHF				HLBF
+#define IEBE				BTRE
+#define IECC				SWLC
+
 #define MTL_EST_GCL_CONTROL		0x00000c80
 #define BTR_LOW				0x0
 #define BTR_HIGH			0x1
@@ -98,6 +134,8 @@
 #define GMAC_RXQCTRL_VFFQ_SHIFT		17
 #define GMAC_RXQCTRL_VFFQE		BIT(16)
 
+#define GMAC_INT_FPE_EN			BIT(17)
+
 int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp);
 int dwmac5_safety_feat_irq_status(struct net_device *ndev,
 		void __iomem *ioaddr, unsigned int asp,
@@ -111,7 +149,12 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 			   u32 sub_second_inc, u32 systime_flags);
 int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 			 unsigned int ptp_rate);
+void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
+			   struct stmmac_extra_stats *x, u32 txqcnt);
 void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			  bool enable);
+void dwmac5_fpe_send_mpacket(void __iomem *ioaddr,
+			     enum stmmac_mpacket_type type);
+int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 77308c5c5d29..f2cab5b76732 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -441,12 +441,9 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
 	dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3;
 }
 
-static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 nchan)
+static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue)
 {
-	u32 i;
-
-	for (i = 0; i < nchan; i++)
-		writel(riwt & XGMAC_RWT, ioaddr + XGMAC_DMA_CH_Rx_WATCHDOG(i));
+	writel(riwt & XGMAC_RWT, ioaddr + XGMAC_DMA_CH_Rx_WATCHDOG(queue));
 }
 
 static void dwxgmac2_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 979ac9fca23c..45edac5f60db 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -206,7 +206,7 @@ struct stmmac_dma_ops {
 	void (*get_hw_feature)(void __iomem *ioaddr,
 			       struct dma_features *dma_cap);
 	/* Program the HW RX Watchdog */
-	void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
+	void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 queue);
 	void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
 	void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
 	void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
@@ -393,8 +393,13 @@ struct stmmac_ops {
 	void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
 	int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg,
 			     unsigned int ptp_rate);
+	void (*est_irq_status)(void __iomem *ioaddr, struct net_device *dev,
+			       struct stmmac_extra_stats *x, u32 txqcnt);
 	void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			      bool enable);
+	void (*fpe_send_mpacket)(void __iomem *ioaddr,
+				 enum stmmac_mpacket_type type);
+	int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -491,8 +496,14 @@ struct stmmac_ops {
 	stmmac_do_void_callback(__priv, mac, set_arp_offload, __args)
 #define stmmac_est_configure(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, est_configure, __args)
+#define stmmac_est_irq_status(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, est_irq_status, __args)
 #define stmmac_fpe_configure(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, fpe_configure, __args)
+#define stmmac_fpe_send_mpacket(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, fpe_send_mpacket, __args)
+#define stmmac_fpe_irq_status(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, fpe_irq_status, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -504,6 +515,7 @@ struct stmmac_hwtimestamp {
 	int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
 			       int add_sub, int gmac4);
 	void (*get_systime) (void __iomem *ioaddr, u64 *systime);
+	void (*get_ptptime)(void __iomem *ioaddr, u64 *ptp_time);
 };
 
 #define stmmac_config_hw_tstamping(__priv, __args...) \
@@ -518,6 +530,8 @@ struct stmmac_hwtimestamp {
 	stmmac_do_callback(__priv, ptp, adjust_systime, __args)
 #define stmmac_get_systime(__priv, __args...) \
 	stmmac_do_void_callback(__priv, ptp, get_systime, __args)
+#define stmmac_get_ptptime(__priv, __args...) \
+	stmmac_do_void_callback(__priv, ptp, get_ptptime, __args)
 
 /* Helpers to manage the descriptors for chain and ring modes */
 struct stmmac_mode_ops {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e553b9a1f785..4faad331a4ca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -147,9 +147,9 @@ struct stmmac_flow_entry {
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	u32 tx_coal_frames;
-	u32 tx_coal_timer;
-	u32 rx_coal_frames;
+	u32 tx_coal_frames[MTL_MAX_TX_QUEUES];
+	u32 tx_coal_timer[MTL_MAX_TX_QUEUES];
+	u32 rx_coal_frames[MTL_MAX_TX_QUEUES];
 
 	int tx_coalesce;
 	int hwts_tx_en;
@@ -160,7 +160,7 @@ struct stmmac_priv {
 
 	unsigned int dma_buf_sz;
 	unsigned int rx_copybreak;
-	u32 rx_riwt;
+	u32 rx_riwt[MTL_MAX_TX_QUEUES];
 	int hwts_rx_en;
 
 	void __iomem *ioaddr;
@@ -234,6 +234,12 @@ struct stmmac_priv {
 	struct workqueue_struct *wq;
 	struct work_struct service_task;
 
+	/* Workqueue for handling FPE hand-shaking */
+	unsigned long fpe_task_state;
+	struct workqueue_struct *fpe_wq;
+	struct work_struct fpe_task;
+	char wq_name[IFNAMSIZ + 4];
+
 	/* TC Handling */
 	unsigned int tc_entries_max;
 	unsigned int tc_off_max;
@@ -272,6 +278,8 @@ void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
 int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt);
 int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
+int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled);
+void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable);
 
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
 void stmmac_selftest_run(struct net_device *dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c5642985ef95..61b11639ee0c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -158,6 +158,12 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	/* TSO */
 	STMMAC_STAT(tx_tso_frames),
 	STMMAC_STAT(tx_tso_nfrags),
+	/* EST */
+	STMMAC_STAT(mtl_est_cgce),
+	STMMAC_STAT(mtl_est_hlbs),
+	STMMAC_STAT(mtl_est_hlbf),
+	STMMAC_STAT(mtl_est_btre),
+	STMMAC_STAT(mtl_est_btrlm),
 };
 #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
 
@@ -756,28 +762,75 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv)
 	return (riwt * 256) / (clk / 1000000);
 }
 
-static int stmmac_get_coalesce(struct net_device *dev,
-			       struct ethtool_coalesce *ec)
+static int __stmmac_get_coalesce(struct net_device *dev,
+				 struct ethtool_coalesce *ec,
+				 int queue)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 max_cnt;
+	u32 rx_cnt;
+	u32 tx_cnt;
 
-	ec->tx_coalesce_usecs = priv->tx_coal_timer;
-	ec->tx_max_coalesced_frames = priv->tx_coal_frames;
+	rx_cnt = priv->plat->rx_queues_to_use;
+	tx_cnt = priv->plat->tx_queues_to_use;
+	max_cnt = max(rx_cnt, tx_cnt);
 
-	if (priv->use_riwt) {
-		ec->rx_max_coalesced_frames = priv->rx_coal_frames;
-		ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt, priv);
+	if (queue < 0)
+		queue = 0;
+	else if (queue >= max_cnt)
+		return -EINVAL;
+
+	if (queue < tx_cnt) {
+		ec->tx_coalesce_usecs = priv->tx_coal_timer[queue];
+		ec->tx_max_coalesced_frames = priv->tx_coal_frames[queue];
+	} else {
+		ec->tx_coalesce_usecs = 0;
+		ec->tx_max_coalesced_frames = 0;
+	}
+
+	if (priv->use_riwt && queue < rx_cnt) {
+		ec->rx_max_coalesced_frames = priv->rx_coal_frames[queue];
+		ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt[queue],
+							 priv);
+	} else {
+		ec->rx_max_coalesced_frames = 0;
+		ec->rx_coalesce_usecs = 0;
 	}
 
 	return 0;
 }
 
-static int stmmac_set_coalesce(struct net_device *dev,
+static int stmmac_get_coalesce(struct net_device *dev,
 			       struct ethtool_coalesce *ec)
 {
+	return __stmmac_get_coalesce(dev, ec, -1);
+}
+
+static int stmmac_get_per_queue_coalesce(struct net_device *dev, u32 queue,
+					 struct ethtool_coalesce *ec)
+{
+	return __stmmac_get_coalesce(dev, ec, queue);
+}
+
+static int __stmmac_set_coalesce(struct net_device *dev,
+				 struct ethtool_coalesce *ec,
+				 int queue)
+{
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	bool all_queues = false;
 	unsigned int rx_riwt;
+	u32 max_cnt;
+	u32 rx_cnt;
+	u32 tx_cnt;
+
+	rx_cnt = priv->plat->rx_queues_to_use;
+	tx_cnt = priv->plat->tx_queues_to_use;
+	max_cnt = max(rx_cnt, tx_cnt);
+
+	if (queue < 0)
+		all_queues = true;
+	else if (queue >= max_cnt)
+		return -EINVAL;
 
 	if (priv->use_riwt && (ec->rx_coalesce_usecs > 0)) {
 		rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv);
@@ -785,8 +838,23 @@ static int stmmac_set_coalesce(struct net_device *dev,
 		if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT))
 			return -EINVAL;
 
-		priv->rx_riwt = rx_riwt;
-		stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+		if (all_queues) {
+			int i;
+
+			for (i = 0; i < rx_cnt; i++) {
+				priv->rx_riwt[i] = rx_riwt;
+				stmmac_rx_watchdog(priv, priv->ioaddr,
+						   rx_riwt, i);
+				priv->rx_coal_frames[i] =
+					ec->rx_max_coalesced_frames;
+			}
+		} else if (queue < rx_cnt) {
+			priv->rx_riwt[queue] = rx_riwt;
+			stmmac_rx_watchdog(priv, priv->ioaddr,
+					   rx_riwt, queue);
+			priv->rx_coal_frames[queue] =
+				ec->rx_max_coalesced_frames;
+		}
 	}
 
 	if ((ec->tx_coalesce_usecs == 0) &&
@@ -797,13 +865,37 @@ static int stmmac_set_coalesce(struct net_device *dev,
 	    (ec->tx_max_coalesced_frames > STMMAC_TX_MAX_FRAMES))
 		return -EINVAL;
 
-	/* Only copy relevant parameters, ignore all others. */
-	priv->tx_coal_frames = ec->tx_max_coalesced_frames;
-	priv->tx_coal_timer = ec->tx_coalesce_usecs;
-	priv->rx_coal_frames = ec->rx_max_coalesced_frames;
+	if (all_queues) {
+		int i;
+
+		for (i = 0; i < tx_cnt; i++) {
+			priv->tx_coal_frames[i] =
+				ec->tx_max_coalesced_frames;
+			priv->tx_coal_timer[i] =
+				ec->tx_coalesce_usecs;
+		}
+	} else if (queue < tx_cnt) {
+		priv->tx_coal_frames[queue] =
+			ec->tx_max_coalesced_frames;
+		priv->tx_coal_timer[queue] =
+			ec->tx_coalesce_usecs;
+	}
+
 	return 0;
 }
 
+static int stmmac_set_coalesce(struct net_device *dev,
+			       struct ethtool_coalesce *ec)
+{
+	return __stmmac_set_coalesce(dev, ec, -1);
+}
+
+static int stmmac_set_per_queue_coalesce(struct net_device *dev, u32 queue,
+					 struct ethtool_coalesce *ec)
+{
+	return __stmmac_set_coalesce(dev, ec, queue);
+}
+
 static int stmmac_get_rxnfc(struct net_device *dev,
 			    struct ethtool_rxnfc *rxnfc, u32 *rule_locs)
 {
@@ -1001,6 +1093,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.get_ts_info = stmmac_get_ts_info,
 	.get_coalesce = stmmac_get_coalesce,
 	.set_coalesce = stmmac_set_coalesce,
+	.get_per_queue_coalesce = stmmac_get_per_queue_coalesce,
+	.set_per_queue_coalesce = stmmac_set_per_queue_coalesce,
 	.get_channels = stmmac_get_channels,
 	.set_channels = stmmac_set_channels,
 	.get_tunable = stmmac_get_tunable,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index d291612eeafb..113c51bcc0b5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -153,6 +153,16 @@ static void get_systime(void __iomem *ioaddr, u64 *systime)
 		*systime = ns;
 }
 
+static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time)
+{
+	u64 ns;
+
+	ns = readl(ptpaddr + PTP_ATNR);
+	ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC;
+
+	*ptp_time = ns;
+}
+
 const struct stmmac_hwtimestamp stmmac_ptp = {
 	.config_hw_tstamping = config_hw_tstamping,
 	.init_systime = init_systime,
@@ -160,4 +170,5 @@ const struct stmmac_hwtimestamp stmmac_ptp = {
 	.config_addend = config_addend,
 	.adjust_systime = adjust_systime,
 	.get_systime = get_systime,
+	.get_ptptime = get_ptptime,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 208cae344ffa..170296820af0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 #include <linux/prefetch.h>
 #include <linux/pinctrl/consumer.h>
 #ifdef CONFIG_DEBUG_FS
@@ -113,6 +114,38 @@ static void stmmac_exit_fs(struct net_device *dev);
 
 #define STMMAC_COAL_TIMER(x) (ns_to_ktime((x) * NSEC_PER_USEC))
 
+int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled)
+{
+	int ret = 0;
+
+	if (enabled) {
+		ret = clk_prepare_enable(priv->plat->stmmac_clk);
+		if (ret)
+			return ret;
+		ret = clk_prepare_enable(priv->plat->pclk);
+		if (ret) {
+			clk_disable_unprepare(priv->plat->stmmac_clk);
+			return ret;
+		}
+		if (priv->plat->clks_config) {
+			ret = priv->plat->clks_config(priv->plat->bsp_priv, enabled);
+			if (ret) {
+				clk_disable_unprepare(priv->plat->stmmac_clk);
+				clk_disable_unprepare(priv->plat->pclk);
+				return ret;
+			}
+		}
+	} else {
+		clk_disable_unprepare(priv->plat->stmmac_clk);
+		clk_disable_unprepare(priv->plat->pclk);
+		if (priv->plat->clks_config)
+			priv->plat->clks_config(priv->plat->bsp_priv, enabled);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(stmmac_bus_clks_config);
+
 /**
  * stmmac_verify_args - verify the driver parameters.
  * Description: it checks the driver parameters and set a default in case of
@@ -433,6 +466,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 {
 	struct skb_shared_hwtstamps shhwtstamp;
 	bool found = false;
+	s64 adjust = 0;
 	u64 ns = 0;
 
 	if (!priv->hwts_tx_en)
@@ -451,6 +485,13 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 	}
 
 	if (found) {
+		/* Correct the clk domain crossing(CDC) error */
+		if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+			adjust += -(2 * (NSEC_PER_SEC /
+					 priv->plat->clk_ptp_rate));
+			ns += adjust;
+		}
+
 		memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
@@ -474,6 +515,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 {
 	struct skb_shared_hwtstamps *shhwtstamp = NULL;
 	struct dma_desc *desc = p;
+	u64 adjust = 0;
 	u64 ns = 0;
 
 	if (!priv->hwts_rx_en)
@@ -485,6 +527,13 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 	/* Check if timestamp is available */
 	if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
 		stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
+
+		/* Correct the clk domain crossing(CDC) error */
+		if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+			adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate);
+			ns -= adjust;
+		}
+
 		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
 		shhwtstamp = skb_hwtstamps(skb);
 		memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
@@ -922,6 +971,21 @@ static void stmmac_mac_an_restart(struct phylink_config *config)
 	/* Not Supported */
 }
 
+static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up)
+{
+	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+	bool *hs_enable = &fpe_cfg->hs_enable;
+
+	if (is_up && *hs_enable) {
+		stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY);
+	} else {
+		*lo_state = FPE_EVENT_UNKNOWN;
+		*lp_state = FPE_EVENT_UNKNOWN;
+	}
+}
+
 static void stmmac_mac_link_down(struct phylink_config *config,
 				 unsigned int mode, phy_interface_t interface)
 {
@@ -932,6 +996,8 @@ static void stmmac_mac_link_down(struct phylink_config *config,
 	priv->tx_lpi_enabled = false;
 	stmmac_eee_init(priv);
 	stmmac_set_eee_pls(priv, priv->hw, false);
+
+	stmmac_fpe_link_state_handle(priv, false);
 }
 
 static void stmmac_mac_link_up(struct phylink_config *config,
@@ -1030,6 +1096,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 		priv->tx_lpi_enabled = priv->eee_enabled;
 		stmmac_set_eee_pls(priv, priv->hw, true);
 	}
+
+	stmmac_fpe_link_state_handle(priv, true);
 }
 
 static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
@@ -1117,6 +1185,8 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 	priv->phylink_config.dev = &priv->dev->dev;
 	priv->phylink_config.type = PHYLINK_NETDEV;
 	priv->phylink_config.pcs_poll = true;
+	priv->phylink_config.ovr_an_inband =
+		priv->plat->mdio_bus_data->xpcs_an_inband;
 
 	if (!fwnode)
 		fwnode = dev_fwnode(priv->device);
@@ -2183,7 +2253,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	/* We still have pending packets, let's call for a new scheduling */
 	if (tx_q->dirty_tx != tx_q->cur_tx)
-		hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer),
+		hrtimer_start(&tx_q->txtimer,
+			      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
 			      HRTIMER_MODE_REL);
 
 	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
@@ -2468,7 +2539,8 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 
-	hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer),
+	hrtimer_start(&tx_q->txtimer,
+		      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
 		      HRTIMER_MODE_REL);
 }
 
@@ -2509,18 +2581,21 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t)
 static void stmmac_init_coalesce(struct stmmac_priv *priv)
 {
 	u32 tx_channel_count = priv->plat->tx_queues_to_use;
+	u32 rx_channel_count = priv->plat->rx_queues_to_use;
 	u32 chan;
 
-	priv->tx_coal_frames = STMMAC_TX_FRAMES;
-	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-	priv->rx_coal_frames = STMMAC_RX_FRAMES;
-
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
 
+		priv->tx_coal_frames[chan] = STMMAC_TX_FRAMES;
+		priv->tx_coal_timer[chan] = STMMAC_COAL_TX_TIMER;
+
 		hrtimer_init(&tx_q->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		tx_q->txtimer.function = stmmac_tx_timer;
 	}
+
+	for (chan = 0; chan < rx_channel_count; chan++)
+		priv->rx_coal_frames[chan] = STMMAC_RX_FRAMES;
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2737,6 +2812,26 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 	}
 }
 
+static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
+{
+	char *name;
+
+	clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+
+	name = priv->wq_name;
+	sprintf(name, "%s-fpe", priv->dev->name);
+
+	priv->fpe_wq = create_singlethread_workqueue(name);
+	if (!priv->fpe_wq) {
+		netdev_err(priv->dev, "%s: Failed to create workqueue\n", name);
+
+		return -ENOMEM;
+	}
+	netdev_info(priv->dev, "FPE workqueue start");
+
+	return 0;
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -2825,10 +2920,15 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 		priv->tx_lpi_timer = eee_timer * 1000;
 
 	if (priv->use_riwt) {
-		if (!priv->rx_riwt)
-			priv->rx_riwt = DEF_DMA_RIWT;
+		u32 queue;
+
+		for (queue = 0; queue < rx_cnt; queue++) {
+			if (!priv->rx_riwt[queue])
+				priv->rx_riwt[queue] = DEF_DMA_RIWT;
 
-		ret = stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+			stmmac_rx_watchdog(priv, priv->ioaddr,
+					   priv->rx_riwt[queue], queue);
+		}
 	}
 
 	if (priv->hw->pcs)
@@ -2868,6 +2968,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	/* Start the ball rolling... */
 	stmmac_start_all_dma(priv);
 
+	if (priv->dma_cap.fpesel) {
+		stmmac_fpe_start_wq(priv);
+
+		if (priv->plat->fpe_cfg->enable)
+			stmmac_fpe_handshake(priv, true);
+	}
+
 	return 0;
 }
 
@@ -2894,15 +3001,21 @@ static int stmmac_open(struct net_device *dev)
 	u32 chan;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI &&
-	    priv->hw->xpcs == NULL) {
+	    priv->hw->xpcs_args.an_mode != DW_AN_C73) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
 			netdev_err(priv->dev,
 				   "%s: Cannot attach to PHY (error: %d)\n",
 				   __func__, ret);
-			return ret;
+			goto init_phy_error;
 		}
 	}
 
@@ -3018,9 +3131,21 @@ init_error:
 	free_dma_desc_resources(priv);
 dma_desc_error:
 	phylink_disconnect_phy(priv->phylink);
+init_phy_error:
+	pm_runtime_put(priv->device);
 	return ret;
 }
 
+static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
+{
+	set_bit(__FPE_REMOVING, &priv->fpe_task_state);
+
+	if (priv->fpe_wq)
+		destroy_workqueue(priv->fpe_wq);
+
+	netdev_info(priv->dev, "FPE workqueue stop");
+}
+
 /**
  *  stmmac_release - close entry point of the driver
  *  @dev : device pointer.
@@ -3068,6 +3193,11 @@ static int stmmac_release(struct net_device *dev)
 
 	stmmac_release_ptp(priv);
 
+	pm_runtime_put(priv->device);
+
+	if (priv->dma_cap.fpesel)
+		stmmac_fpe_stop_wq(priv);
+
 	return 0;
 }
 
@@ -3317,11 +3447,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
 		set_ic = true;
-	else if (!priv->tx_coal_frames)
+	else if (!priv->tx_coal_frames[queue])
 		set_ic = false;
-	else if (tx_packets > priv->tx_coal_frames)
+	else if (tx_packets > priv->tx_coal_frames[queue])
 		set_ic = true;
-	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
+	else if ((tx_q->tx_count_frames %
+		  priv->tx_coal_frames[queue]) < tx_packets)
 		set_ic = true;
 	else
 		set_ic = false;
@@ -3546,11 +3677,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
 		set_ic = true;
-	else if (!priv->tx_coal_frames)
+	else if (!priv->tx_coal_frames[queue])
 		set_ic = false;
-	else if (tx_packets > priv->tx_coal_frames)
+	else if (tx_packets > priv->tx_coal_frames[queue])
 		set_ic = true;
-	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
+	else if ((tx_q->tx_count_frames %
+		  priv->tx_coal_frames[queue]) < tx_packets)
 		set_ic = true;
 	else
 		set_ic = false;
@@ -3749,11 +3881,11 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 		stmmac_refill_desc3(priv, rx_q, p);
 
 		rx_q->rx_count_frames++;
-		rx_q->rx_count_frames += priv->rx_coal_frames;
-		if (rx_q->rx_count_frames > priv->rx_coal_frames)
+		rx_q->rx_count_frames += priv->rx_coal_frames[queue];
+		if (rx_q->rx_count_frames > priv->rx_coal_frames[queue])
 			rx_q->rx_count_frames = 0;
 
-		use_rx_wd = !priv->rx_coal_frames;
+		use_rx_wd = !priv->rx_coal_frames[queue];
 		use_rx_wd |= rx_q->rx_count_frames > 0;
 		if (!priv->use_riwt)
 			use_rx_wd = false;
@@ -4207,6 +4339,48 @@ static int stmmac_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status)
+{
+	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+	bool *hs_enable = &fpe_cfg->hs_enable;
+
+	if (status == FPE_EVENT_UNKNOWN || !*hs_enable)
+		return;
+
+	/* If LP has sent verify mPacket, LP is FPE capable */
+	if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) {
+		if (*lp_state < FPE_STATE_CAPABLE)
+			*lp_state = FPE_STATE_CAPABLE;
+
+		/* If user has requested FPE enable, quickly response */
+		if (*hs_enable)
+			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						MPACKET_RESPONSE);
+	}
+
+	/* If Local has sent verify mPacket, Local is FPE capable */
+	if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER) {
+		if (*lo_state < FPE_STATE_CAPABLE)
+			*lo_state = FPE_STATE_CAPABLE;
+	}
+
+	/* If LP has sent response mPacket, LP is entering FPE ON */
+	if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP)
+		*lp_state = FPE_STATE_ENTERING_ON;
+
+	/* If Local has sent response mPacket, Local is entering FPE ON */
+	if ((status & FPE_EVENT_TRSP) == FPE_EVENT_TRSP)
+		*lo_state = FPE_STATE_ENTERING_ON;
+
+	if (!test_bit(__FPE_REMOVING, &priv->fpe_task_state) &&
+	    !test_and_set_bit(__FPE_TASK_SCHED, &priv->fpe_task_state) &&
+	    priv->fpe_wq) {
+		queue_work(priv->fpe_wq, &priv->fpe_task);
+	}
+}
+
 /**
  *  stmmac_interrupt - main ISR
  *  @irq: interrupt number.
@@ -4241,6 +4415,17 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 	if (stmmac_safety_feat_interrupt(priv))
 		return IRQ_HANDLED;
 
+	if (priv->dma_cap.estsel)
+		stmmac_est_irq_status(priv, priv->ioaddr, priv->dev,
+				      &priv->xstats, tx_cnt);
+
+	if (priv->dma_cap.fpesel) {
+		int status = stmmac_fpe_irq_status(priv, priv->ioaddr,
+						   priv->dev);
+
+		stmmac_fpe_event_status(priv, status);
+	}
+
 	/* To handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || xmac) {
 		int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
@@ -4704,6 +4889,12 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
 	bool is_double = false;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
@@ -4737,10 +4928,15 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
 	if (priv->hw->num_vlan) {
 		ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
 		if (ret)
-			return ret;
+			goto del_vlan_error;
 	}
 
-	return stmmac_vlan_update(priv, is_double);
+	ret = stmmac_vlan_update(priv, is_double);
+
+del_vlan_error:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 static const struct net_device_ops stmmac_netdev_ops = {
@@ -4977,6 +5173,68 @@ int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size)
 	return ret;
 }
 
+#define SEND_VERIFY_MPAKCET_FMT "Send Verify mPacket lo_state=%d lp_state=%d\n"
+static void stmmac_fpe_lp_task(struct work_struct *work)
+{
+	struct stmmac_priv *priv = container_of(work, struct stmmac_priv,
+						fpe_task);
+	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+	bool *hs_enable = &fpe_cfg->hs_enable;
+	bool *enable = &fpe_cfg->enable;
+	int retries = 20;
+
+	while (retries-- > 0) {
+		/* Bail out immediately if FPE handshake is OFF */
+		if (*lo_state == FPE_STATE_OFF || !*hs_enable)
+			break;
+
+		if (*lo_state == FPE_STATE_ENTERING_ON &&
+		    *lp_state == FPE_STATE_ENTERING_ON) {
+			stmmac_fpe_configure(priv, priv->ioaddr,
+					     priv->plat->tx_queues_to_use,
+					     priv->plat->rx_queues_to_use,
+					     *enable);
+
+			netdev_info(priv->dev, "configured FPE\n");
+
+			*lo_state = FPE_STATE_ON;
+			*lp_state = FPE_STATE_ON;
+			netdev_info(priv->dev, "!!! BOTH FPE stations ON\n");
+			break;
+		}
+
+		if ((*lo_state == FPE_STATE_CAPABLE ||
+		     *lo_state == FPE_STATE_ENTERING_ON) &&
+		     *lp_state != FPE_STATE_ON) {
+			netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT,
+				    *lo_state, *lp_state);
+			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						MPACKET_VERIFY);
+		}
+		/* Sleep then retry */
+		msleep(500);
+	}
+
+	clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+}
+
+void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable)
+{
+	if (priv->plat->fpe_cfg->hs_enable != enable) {
+		if (enable) {
+			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						MPACKET_VERIFY);
+		} else {
+			priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF;
+			priv->plat->fpe_cfg->lp_fpe_state = FPE_STATE_OFF;
+		}
+
+		priv->plat->fpe_cfg->hs_enable = enable;
+	}
+}
+
 /**
  * stmmac_dvr_probe
  * @device: device pointer
@@ -5034,6 +5292,9 @@ int stmmac_dvr_probe(struct device *device,
 
 	INIT_WORK(&priv->service_task, stmmac_service_task);
 
+	/* Initialize Link Partner FPE workqueue */
+	INIT_WORK(&priv->fpe_task, stmmac_fpe_lp_task);
+
 	/* Override with kernel parameters if supplied XXX CRS XXX
 	 * this needs to have multiple instances
 	 */
@@ -5179,6 +5440,10 @@ int stmmac_dvr_probe(struct device *device,
 
 	stmmac_check_pcs_mode(priv);
 
+	pm_runtime_get_noresume(device);
+	pm_runtime_set_active(device);
+	pm_runtime_enable(device);
+
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI) {
 		/* MDIO bus Registration */
@@ -5216,6 +5481,11 @@ int stmmac_dvr_probe(struct device *device,
 	stmmac_init_fs(ndev);
 #endif
 
+	/* Let pm_runtime_put() disable the clocks.
+	 * If CONFIG_PM is not enabled, the clocks will stay powered.
+	 */
+	pm_runtime_put(device);
+
 	return ret;
 
 error_serdes_powerup:
@@ -5230,6 +5500,7 @@ error_mdio_register:
 	stmmac_napi_del(ndev);
 error_hw_init:
 	destroy_workqueue(priv->wq);
+	stmmac_bus_clks_config(priv, false);
 
 	return ret;
 }
@@ -5265,8 +5536,8 @@ int stmmac_dvr_remove(struct device *dev)
 	phylink_destroy(priv->phylink);
 	if (priv->plat->stmmac_rst)
 		reset_control_assert(priv->plat->stmmac_rst);
-	clk_disable_unprepare(priv->plat->pclk);
-	clk_disable_unprepare(priv->plat->stmmac_clk);
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
@@ -5289,6 +5560,7 @@ int stmmac_suspend(struct device *dev)
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	u32 chan;
+	int ret;
 
 	if (!ndev || !netif_running(ndev))
 		return 0;
@@ -5332,11 +5604,22 @@ int stmmac_suspend(struct device *dev)
 		pinctrl_pm_select_sleep_state(priv->device);
 		/* Disable clock in case of PWM is off */
 		clk_disable_unprepare(priv->plat->clk_ptp_ref);
-		clk_disable_unprepare(priv->plat->pclk);
-		clk_disable_unprepare(priv->plat->stmmac_clk);
+		ret = pm_runtime_force_suspend(dev);
+		if (ret)
+			return ret;
 	}
+
 	mutex_unlock(&priv->lock);
 
+	if (priv->dma_cap.fpesel) {
+		/* Disable FPE */
+		stmmac_fpe_configure(priv, priv->ioaddr,
+				     priv->plat->tx_queues_to_use,
+				     priv->plat->rx_queues_to_use, false);
+
+		stmmac_fpe_handshake(priv, false);
+	}
+
 	priv->speed = SPEED_UNKNOWN;
 	return 0;
 }
@@ -5399,8 +5682,9 @@ int stmmac_resume(struct device *dev)
 	} else {
 		pinctrl_pm_select_default_state(priv->device);
 		/* enable the clk previously disabled */
-		clk_prepare_enable(priv->plat->stmmac_clk);
-		clk_prepare_enable(priv->plat->pclk);
+		ret = pm_runtime_force_resume(dev);
+		if (ret)
+			return ret;
 		if (priv->plat->clk_ptp_ref)
 			clk_prepare_enable(priv->plat->clk_ptp_ref);
 		/* reset the phy so that it's ready */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index d64116e0543e..b750074f8f9c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -15,6 +15,7 @@
 #include <linux/iopoll.h>
 #include <linux/mii.h>
 #include <linux/of_mdio.h>
+#include <linux/pm_runtime.h>
 #include <linux/phy.h>
 #include <linux/property.h>
 #include <linux/slab.h>
@@ -87,21 +88,29 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	u32 tmp, addr, value = MII_XGMAC_BUSY;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	if (phyreg & MII_ADDR_C45) {
 		phyreg &= ~MII_ADDR_C45;
 
 		ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 	} else {
 		ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 
 		value |= MII_XGMAC_SADDR;
 	}
@@ -112,8 +121,10 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Set the MII address register to read */
 	writel(addr, priv->ioaddr + mii_address);
@@ -121,11 +132,18 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Read the data from the MII data register */
-	return readl(priv->ioaddr + mii_data) & GENMASK(15, 0);
+	ret = (int)readl(priv->ioaddr + mii_data) & GENMASK(15, 0);
+
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr,
@@ -138,21 +156,29 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr,
 	u32 addr, tmp, value = MII_XGMAC_BUSY;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	if (phyreg & MII_ADDR_C45) {
 		phyreg &= ~MII_ADDR_C45;
 
 		ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 	} else {
 		ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 
 		value |= MII_XGMAC_SADDR;
 	}
@@ -164,16 +190,23 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr,
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Set the MII address register to write */
 	writel(addr, priv->ioaddr + mii_address);
 	writel(value, priv->ioaddr + mii_data);
 
 	/* Wait until any existing MII operation is complete */
-	return readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-				  !(tmp & MII_XGMAC_BUSY), 100, 10000);
+	ret = readl_poll_timeout(priv->ioaddr + mii_data, tmp,
+				 !(tmp & MII_XGMAC_BUSY), 100, 10000);
+
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 /**
@@ -196,6 +229,12 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	int data = 0;
 	u32 v;
 
+	data = pm_runtime_get_sync(priv->device);
+	if (data < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return data;
+	}
+
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
@@ -216,19 +255,26 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	}
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-			       100, 10000))
-		return -EBUSY;
+			       100, 10000)) {
+		data = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-			       100, 10000))
-		return -EBUSY;
+			       100, 10000)) {
+		data = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Read the data from the MII data register */
 	data = (int)readl(priv->ioaddr + mii_data) & MII_DATA_MASK;
 
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
 	return data;
 }
 
@@ -247,10 +293,16 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
+	int ret, data = phydata;
 	u32 value = MII_BUSY;
-	int data = phydata;
 	u32 v;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
@@ -275,16 +327,23 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-			       100, 10000))
-		return -EBUSY;
+			       100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Set the MII address register to write */
 	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	/* Wait until any existing MII operation is complete */
-	return readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-				  100, 10000);
+	ret = readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
+				 100, 10000);
+
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 /**
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 6dc9f10414e4..5a1e018884e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -704,7 +704,6 @@ int stmmac_pltfr_remove(struct platform_device *pdev)
 }
 EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
 
-#ifdef CONFIG_PM_SLEEP
 /**
  * stmmac_pltfr_suspend
  * @dev: device pointer
@@ -712,7 +711,7 @@ EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
  * call the main suspend function and then, if required, on some platform, it
  * can call an exit helper.
  */
-static int stmmac_pltfr_suspend(struct device *dev)
+static int __maybe_unused stmmac_pltfr_suspend(struct device *dev)
 {
 	int ret;
 	struct net_device *ndev = dev_get_drvdata(dev);
@@ -733,7 +732,7 @@ static int stmmac_pltfr_suspend(struct device *dev)
  * the main resume function, on some platforms, it can call own init helper
  * if required.
  */
-static int stmmac_pltfr_resume(struct device *dev)
+static int __maybe_unused stmmac_pltfr_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
@@ -744,10 +743,29 @@ static int stmmac_pltfr_resume(struct device *dev)
 
 	return stmmac_resume(dev);
 }
-#endif /* CONFIG_PM_SLEEP */
 
-SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, stmmac_pltfr_suspend,
-				       stmmac_pltfr_resume);
+static int __maybe_unused stmmac_runtime_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	stmmac_bus_clks_config(priv, false);
+
+	return 0;
+}
+
+static int __maybe_unused stmmac_runtime_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	return stmmac_bus_clks_config(priv, true);
+}
+
+const struct dev_pm_ops stmmac_pltfr_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume)
+	SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL)
+};
 EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops);
 
 MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet platform support");
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 0989e2bb6ee3..b164ae22e35f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -9,6 +9,7 @@
 *******************************************************************************/
 #include "stmmac.h"
 #include "stmmac_ptp.h"
+#include "dwmac4.h"
 
 /**
  * stmmac_adjust_freq
@@ -165,6 +166,36 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 	return ret;
 }
 
+/**
+ * stmmac_get_syncdevicetime
+ * @device: current device time
+ * @system: system counter value read synchronously with device time
+ * @ctx: context provided by timekeeping code
+ * Description: Read device and system clock simultaneously and return the
+ * corrected clock values in ns.
+ **/
+static int stmmac_get_syncdevicetime(ktime_t *device,
+				     struct system_counterval_t *system,
+				     void *ctx)
+{
+	struct stmmac_priv *priv = (struct stmmac_priv *)ctx;
+
+	if (priv->plat->crosststamp)
+		return priv->plat->crosststamp(device, system, ctx);
+	else
+		return -EOPNOTSUPP;
+}
+
+static int stmmac_getcrosststamp(struct ptp_clock_info *ptp,
+				 struct system_device_crosststamp *xtstamp)
+{
+	struct stmmac_priv *priv =
+		container_of(ptp, struct stmmac_priv, ptp_clock_ops);
+
+	return get_device_system_crosststamp(stmmac_get_syncdevicetime,
+					     priv, NULL, xtstamp);
+}
+
 /* structure describing a PTP hardware clock */
 static struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.owner = THIS_MODULE,
@@ -180,6 +211,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.gettime64 = stmmac_get_time,
 	.settime64 = stmmac_set_time,
 	.enable = stmmac_enable,
+	.getcrosststamp = stmmac_getcrosststamp,
 };
 
 /**
@@ -192,6 +224,9 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
 {
 	int i;
 
+	if (priv->plat->ptp_clk_freq_config)
+		priv->plat->ptp_clk_freq_config(priv);
+
 	for (i = 0; i < priv->dma_cap.pps_out_num; i++) {
 		if (i >= STMMAC_PPS_MAX)
 			break;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index 7abb1d47e7da..f88727ce4d30 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
@@ -23,6 +23,9 @@
 #define	PTP_STSUR	0x10	/* System Time – Seconds Update Reg */
 #define	PTP_STNSUR	0x14	/* System Time – Nanoseconds Update Reg */
 #define	PTP_TAR		0x18	/* Timestamp Addend Reg */
+#define	PTP_ACR		0x40	/* Auxiliary Control Reg */
+#define	PTP_ATNR	0x48	/* Auxiliary Timestamp - Nanoseconds Reg */
+#define	PTP_ATSR	0x4c	/* Auxiliary Timestamp - Seconds Reg */
 
 #define	PTP_STNSUR_ADDSUB_SHIFT	31
 #define	PTP_DIGITAL_ROLLOVER_MODE	0x3B9ACA00	/* 10e9-1 ns */
@@ -64,4 +67,24 @@
 #define	PTP_SSIR_SSINC_MASK		0xff
 #define	GMAC4_PTP_SSIR_SSINC_SHIFT	16
 
+/* Auxiliary Control defines */
+#define	PTP_ACR_ATSFC		BIT(0)	/* Auxiliary Snapshot FIFO Clear */
+#define	PTP_ACR_ATSEN0		BIT(4)	/* Auxiliary Snapshot 0 Enable */
+#define	PTP_ACR_ATSEN1		BIT(5)	/* Auxiliary Snapshot 1 Enable */
+#define	PTP_ACR_ATSEN2		BIT(6)	/* Auxiliary Snapshot 2 Enable */
+#define	PTP_ACR_ATSEN3		BIT(7)	/* Auxiliary Snapshot 3 Enable */
+#define	PTP_ACR_MASK		GENMASK(7, 4)	/* Aux Snapshot Mask */
+#define	PMC_ART_VALUE0		0x01	/* PMC_ART[15:0] timer value */
+#define	PMC_ART_VALUE1		0x02	/* PMC_ART[31:16] timer value */
+#define	PMC_ART_VALUE2		0x03	/* PMC_ART[47:32] timer value */
+#define	PMC_ART_VALUE3		0x04	/* PMC_ART[63:48] timer value */
+#define	GMAC4_ART_TIME_SHIFT	16	/* ART TIME 16-bits shift */
+
+enum aux_snapshot {
+	AUX_SNAPSHOT0 = 0x10,
+	AUX_SNAPSHOT1 = 0x20,
+	AUX_SNAPSHOT2 = 0x40,
+	AUX_SNAPSHOT3 = 0x80,
+};
+
 #endif	/* __STMMAC_PTP_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 44bb133c3000..1d84ee359808 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -297,6 +297,17 @@ static int tc_init(struct stmmac_priv *priv)
 
 	dev_info(priv->device, "Enabling HW TC (entries=%d, max_off=%d)\n",
 			priv->tc_entries_max, priv->tc_off_max);
+
+	if (!priv->plat->fpe_cfg) {
+		priv->plat->fpe_cfg = devm_kzalloc(priv->device,
+						   sizeof(*priv->plat->fpe_cfg),
+						   GFP_KERNEL);
+		if (!priv->plat->fpe_cfg)
+			return -ENOMEM;
+	} else {
+		memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg));
+	}
+
 	return 0;
 }
 
@@ -598,6 +609,87 @@ static int tc_del_flow(struct stmmac_priv *priv,
 	return ret;
 }
 
+#define VLAN_PRIO_FULL_MASK (0x07)
+
+static int tc_add_vlan_flow(struct stmmac_priv *priv,
+			    struct flow_cls_offload *cls)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
+	struct flow_match_vlan match;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
+		return -EINVAL;
+
+	if (tc < 0) {
+		netdev_err(priv->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	flow_rule_match_vlan(rule, &match);
+
+	if (match.mask->vlan_priority) {
+		u32 prio;
+
+		if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+			netdev_err(priv->dev, "Only full mask is supported for VLAN priority");
+			return -EINVAL;
+		}
+
+		prio = BIT(match.key->vlan_priority);
+		stmmac_rx_queue_prio(priv, priv->hw, prio, tc);
+	}
+
+	return 0;
+}
+
+static int tc_del_vlan_flow(struct stmmac_priv *priv,
+			    struct flow_cls_offload *cls)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
+		return -EINVAL;
+
+	if (tc < 0) {
+		netdev_err(priv->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	stmmac_rx_queue_prio(priv, priv->hw, 0, tc);
+
+	return 0;
+}
+
+static int tc_add_flow_cls(struct stmmac_priv *priv,
+			   struct flow_cls_offload *cls)
+{
+	int ret;
+
+	ret = tc_add_flow(priv, cls);
+	if (!ret)
+		return ret;
+
+	return tc_add_vlan_flow(priv, cls);
+}
+
+static int tc_del_flow_cls(struct stmmac_priv *priv,
+			   struct flow_cls_offload *cls)
+{
+	int ret;
+
+	ret = tc_del_flow(priv, cls);
+	if (!ret)
+		return ret;
+
+	return tc_del_vlan_flow(priv, cls);
+}
+
 static int tc_setup_cls(struct stmmac_priv *priv,
 			struct flow_cls_offload *cls)
 {
@@ -609,10 +701,10 @@ static int tc_setup_cls(struct stmmac_priv *priv,
 
 	switch (cls->command) {
 	case FLOW_CLS_REPLACE:
-		ret = tc_add_flow(priv, cls);
+		ret = tc_add_flow_cls(priv, cls);
 		break;
 	case FLOW_CLS_DESTROY:
-		ret = tc_del_flow(priv, cls);
+		ret = tc_del_flow_cls(priv, cls);
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -748,13 +840,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
 	if (fpe && !priv->dma_cap.fpesel)
 		return -EOPNOTSUPP;
 
-	ret = stmmac_fpe_configure(priv, priv->ioaddr,
-				   priv->plat->tx_queues_to_use,
-				   priv->plat->rx_queues_to_use, fpe);
-	if (ret && fpe) {
-		netdev_err(priv->dev, "failed to enable Frame Preemption\n");
-		return ret;
-	}
+	/* Actual FPE register configuration will be done after FPE handshake
+	 * is success.
+	 */
+	priv->plat->fpe_cfg->enable = fpe;
 
 	ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
 				   priv->plat->clk_ptp_rate);
@@ -764,12 +853,29 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
 	}
 
 	netdev_info(priv->dev, "configured EST\n");
+
+	if (fpe) {
+		stmmac_fpe_handshake(priv, true);
+		netdev_info(priv->dev, "start FPE handshake\n");
+	}
+
 	return 0;
 
 disable:
 	priv->plat->est->enable = false;
 	stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
 			     priv->plat->clk_ptp_rate);
+
+	priv->plat->fpe_cfg->enable = false;
+	stmmac_fpe_configure(priv, priv->ioaddr,
+			     priv->plat->tx_queues_to_use,
+			     priv->plat->rx_queues_to_use,
+			     false);
+	netdev_info(priv->dev, "disabled FPE\n");
+
+	stmmac_fpe_handshake(priv, false);
+	netdev_info(priv->dev, "stop FPE handshake\n");
+
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 9ff894ba8d3e..54f45d8c79a7 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1599,6 +1599,7 @@ static inline int cas_mdio_link_not_up(struct cas *cp)
 			cas_phy_write(cp, MII_BMCR, val);
 			break;
 		}
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 58f142ee78a3..9790656cf970 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1674,8 +1674,8 @@ static void gem_init_phy(struct gem *gp)
 	if (gp->pdev->vendor == PCI_VENDOR_ID_APPLE) {
 		int i;
 
-		/* Those delay sucks, the HW seem to love them though, I'll
-		 * serisouly consider breaking some locks here to be able
+		/* Those delays sucks, the HW seems to love them though, I'll
+		 * seriously consider breaking some locks here to be able
 		 * to schedule instead
 		 */
 		for (i = 0; i < 3; i++) {
diff --git a/drivers/net/fddi/Kconfig b/drivers/net/fddi/Kconfig
index f722079dfb6a..846bf41c2717 100644
--- a/drivers/net/fddi/Kconfig
+++ b/drivers/net/fddi/Kconfig
@@ -38,22 +38,6 @@ config DEFXX
 	  To compile this driver as a module, choose M here: the module
 	  will be called defxx.  If unsure, say N.
 
-config DEFXX_MMIO
-	bool
-	prompt "Use MMIO instead of PIO" if PCI || EISA
-	depends on DEFXX
-	default n if PCI || EISA
-	default y
-	help
-	  This instructs the driver to use EISA or PCI memory-mapped I/O
-	  (MMIO) as appropriate instead of programmed I/O ports (PIO).
-	  Enabling this gives an improvement in processing time in parts
-	  of the driver, but it may cause problems with EISA (DEFEA)
-	  adapters.  TURBOchannel does not have the concept of I/O ports,
-	  so MMIO is always used for these (DEFTA) adapters.
-
-	  If unsure, say N.
-
 config SKFP
 	tristate "SysKonnect FDDI PCI support"
 	depends on FDDI && PCI
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 077c68498f04..6d1e3f49a3d3 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -21,7 +21,7 @@
  *   LVS	Lawrence V. Stefani <lstefani@yahoo.com>
  *
  * Maintainers:
- *   macro	Maciej W. Rozycki <macro@linux-mips.org>
+ *   macro	Maciej W. Rozycki <macro@orcam.me.uk>
  *
  * Credits:
  *   I'd like to thank Patricia Cross for helping me get started with
@@ -197,6 +197,7 @@
  *		23 Oct 2006	macro		Big-endian host support.
  *		14 Dec 2006	macro		TURBOchannel support.
  *		01 Jul 2014	macro		Fixes for DMA on 64-bit hosts.
+ *		10 Mar 2021	macro		Dynamic MMIO vs port I/O.
  */
 
 /* Include files */
@@ -225,8 +226,8 @@
 
 /* Version information string should be updated prior to each new release!  */
 #define DRV_NAME "defxx"
-#define DRV_VERSION "v1.11"
-#define DRV_RELDATE "2014/07/01"
+#define DRV_VERSION "v1.12"
+#define DRV_RELDATE "2021/03/10"
 
 static const char version[] =
 	DRV_NAME ": " DRV_VERSION " " DRV_RELDATE
@@ -253,10 +254,10 @@ static const char version[] =
 #define DFX_BUS_TC(dev) 0
 #endif
 
-#ifdef CONFIG_DEFXX_MMIO
-#define DFX_MMIO 1
+#if defined(CONFIG_EISA) || defined(CONFIG_PCI)
+#define dfx_use_mmio bp->mmio
 #else
-#define DFX_MMIO 0
+#define dfx_use_mmio true
 #endif
 
 /* Define module-wide (static) routines */
@@ -374,8 +375,6 @@ static inline void dfx_outl(DFX_board_t *bp, int offset, u32 data)
 static void dfx_port_write_long(DFX_board_t *bp, int offset, u32 data)
 {
 	struct device __maybe_unused *bdev = bp->bus_dev;
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 
 	if (dfx_use_mmio)
 		dfx_writel(bp, offset, data);
@@ -398,8 +397,6 @@ static inline void dfx_inl(DFX_board_t *bp, int offset, u32 *data)
 static void dfx_port_read_long(DFX_board_t *bp, int offset, u32 *data)
 {
 	struct device __maybe_unused *bdev = bp->bus_dev;
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 
 	if (dfx_use_mmio)
 		dfx_readl(bp, offset, data);
@@ -421,7 +418,7 @@ static void dfx_port_read_long(DFX_board_t *bp, int offset, u32 *data)
  *   None
  *
  * Arguments:
- *   bdev	- pointer to device information
+ *   bp		- pointer to board information
  *   bar_start	- pointer to store the start addresses
  *   bar_len	- pointer to store the lengths of the areas
  *
@@ -431,13 +428,13 @@ static void dfx_port_read_long(DFX_board_t *bp, int offset, u32 *data)
  * Side Effects:
  *   None
  */
-static void dfx_get_bars(struct device *bdev,
+static void dfx_get_bars(DFX_board_t *bp,
 			 resource_size_t *bar_start, resource_size_t *bar_len)
 {
+	struct device *bdev = bp->bus_dev;
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 
 	if (dfx_bus_pci) {
 		int num = dfx_use_mmio ? 0 : 1;
@@ -495,6 +492,13 @@ static const struct net_device_ops dfx_netdev_ops = {
 	.ndo_set_mac_address	= dfx_ctl_set_mac_address,
 };
 
+static void dfx_register_res_err(const char *print_name, bool mmio,
+				 unsigned long start, unsigned long len)
+{
+	pr_err("%s: Cannot reserve %s resource 0x%lx @ 0x%lx, aborting\n",
+	       print_name, mmio ? "MMIO" : "I/O", len, start);
+}
+
 /*
  * ================
  * = dfx_register =
@@ -528,8 +532,6 @@ static int dfx_register(struct device *bdev)
 	static int version_disp;
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	const char *print_name = dev_name(bdev);
 	struct net_device *dev;
 	DFX_board_t	  *bp;			/* board pointer */
@@ -567,46 +569,48 @@ static int dfx_register(struct device *bdev)
 	bp->bus_dev = bdev;
 	dev_set_drvdata(bdev, dev);
 
-	dfx_get_bars(bdev, bar_start, bar_len);
-	if (dfx_bus_eisa && dfx_use_mmio && bar_start[0] == 0) {
-		pr_err("%s: Cannot use MMIO, no address set, aborting\n",
-		       print_name);
-		pr_err("%s: Run ECU and set adapter's MMIO location\n",
-		       print_name);
-		pr_err("%s: Or recompile driver with \"CONFIG_DEFXX_MMIO=n\""
-		       "\n", print_name);
-		err = -ENXIO;
-		goto err_out;
+	bp->mmio = true;
+
+	dfx_get_bars(bp, bar_start, bar_len);
+	if (bar_len[0] == 0 ||
+	    (dfx_bus_eisa && dfx_use_mmio && bar_start[0] == 0)) {
+		bp->mmio = false;
+		dfx_get_bars(bp, bar_start, bar_len);
 	}
 
-	if (dfx_use_mmio)
+	if (dfx_use_mmio) {
 		region = request_mem_region(bar_start[0], bar_len[0],
-					    print_name);
-	else
-		region = request_region(bar_start[0], bar_len[0], print_name);
+					    bdev->driver->name);
+		if (!region && (dfx_bus_eisa || dfx_bus_pci)) {
+			bp->mmio = false;
+			dfx_get_bars(bp, bar_start, bar_len);
+		}
+	}
+	if (!dfx_use_mmio)
+		region = request_region(bar_start[0], bar_len[0],
+					bdev->driver->name);
 	if (!region) {
-		pr_err("%s: Cannot reserve %s resource 0x%lx @ 0x%lx, "
-		       "aborting\n", dfx_use_mmio ? "MMIO" : "I/O", print_name,
-		       (long)bar_len[0], (long)bar_start[0]);
+		dfx_register_res_err(print_name, dfx_use_mmio,
+				     bar_start[0], bar_len[0]);
 		err = -EBUSY;
 		goto err_out_disable;
 	}
 	if (bar_start[1] != 0) {
-		region = request_region(bar_start[1], bar_len[1], print_name);
+		region = request_region(bar_start[1], bar_len[1],
+					bdev->driver->name);
 		if (!region) {
-			pr_err("%s: Cannot reserve I/O resource "
-			       "0x%lx @ 0x%lx, aborting\n", print_name,
-			       (long)bar_len[1], (long)bar_start[1]);
+			dfx_register_res_err(print_name, 0,
+					     bar_start[1], bar_len[1]);
 			err = -EBUSY;
 			goto err_out_csr_region;
 		}
 	}
 	if (bar_start[2] != 0) {
-		region = request_region(bar_start[2], bar_len[2], print_name);
+		region = request_region(bar_start[2], bar_len[2],
+					bdev->driver->name);
 		if (!region) {
-			pr_err("%s: Cannot reserve I/O resource "
-			       "0x%lx @ 0x%lx, aborting\n", print_name,
-			       (long)bar_len[2], (long)bar_start[2]);
+			dfx_register_res_err(print_name, 0,
+					     bar_start[2], bar_len[2]);
 			err = -EBUSY;
 			goto err_out_bh_region;
 		}
@@ -721,7 +725,6 @@ static void dfx_bus_init(struct net_device *dev)
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	u8 val;
 
 	DBG_printk("In dfx_bus_init...\n");
@@ -1041,7 +1044,6 @@ static int dfx_driver_init(struct net_device *dev, const char *print_name,
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	int alloc_size;			/* total buffer size needed */
 	char *top_v, *curr_v;		/* virtual addrs into memory block */
 	dma_addr_t top_p, curr_p;	/* physical addrs into memory block */
@@ -3695,8 +3697,6 @@ static void dfx_unregister(struct device *bdev)
 	struct net_device *dev = dev_get_drvdata(bdev);
 	DFX_board_t *bp = netdev_priv(dev);
 	int dfx_bus_pci = dev_is_pci(bdev);
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	resource_size_t bar_start[3] = {0};	/* pointers to ports */
 	resource_size_t bar_len[3] = {0};	/* resource lengths */
 	int		alloc_size;		/* total buffer size used */
@@ -3716,7 +3716,7 @@ static void dfx_unregister(struct device *bdev)
 
 	dfx_bus_uninit(dev);
 
-	dfx_get_bars(bdev, bar_start, bar_len);
+	dfx_get_bars(bp, bar_start, bar_len);
 	if (bar_start[2] != 0)
 		release_region(bar_start[2], bar_len[2]);
 	if (bar_start[1] != 0)
@@ -3748,7 +3748,7 @@ static const struct pci_device_id dfx_pci_table[] = {
 MODULE_DEVICE_TABLE(pci, dfx_pci_table);
 
 static struct pci_driver dfx_pci_driver = {
-	.name		= "defxx",
+	.name		= DRV_NAME,
 	.id_table	= dfx_pci_table,
 	.probe		= dfx_pci_register,
 	.remove		= dfx_pci_unregister,
@@ -3779,7 +3779,7 @@ MODULE_DEVICE_TABLE(eisa, dfx_eisa_table);
 static struct eisa_driver dfx_eisa_driver = {
 	.id_table	= dfx_eisa_table,
 	.driver		= {
-		.name	= "defxx",
+		.name	= DRV_NAME,
 		.bus	= &eisa_bus_type,
 		.probe	= dfx_dev_register,
 		.remove	= dfx_dev_unregister,
@@ -3800,7 +3800,7 @@ MODULE_DEVICE_TABLE(tc, dfx_tc_table);
 static struct tc_driver dfx_tc_driver = {
 	.id_table	= dfx_tc_table,
 	.driver		= {
-		.name	= "defxx",
+		.name	= DRV_NAME,
 		.bus	= &tc_bus_type,
 		.probe	= dfx_dev_register,
 		.remove	= dfx_dev_unregister,
diff --git a/drivers/net/fddi/defxx.h b/drivers/net/fddi/defxx.h
index 9d30fde2ef3c..8193713e12db 100644
--- a/drivers/net/fddi/defxx.h
+++ b/drivers/net/fddi/defxx.h
@@ -16,7 +16,7 @@
  *   LVS	Lawrence V. Stefani <lstefani@yahoo.com>
  *
  * Maintainers:
- *   macro	Maciej W. Rozycki <macro@linux-mips.org>
+ *   macro	Maciej W. Rozycki <macro@orcam.me.uk>
  *
  * Modification History:
  *		Date		Name	Description
@@ -27,6 +27,7 @@
  *		04 Aug 2003	macro		Converted to the DMA API.
  *		23 Oct 2006	macro		Big-endian host support.
  *		14 Dec 2006	macro		TURBOchannel support.
+ *		10 Mar 2021	macro		Dynamic MMIO vs port I/O.
  */
 
 #ifndef _DEFXX_H_
@@ -1776,6 +1777,8 @@ typedef struct DFX_board_tag
 		int port;
 	} base;										/* base address */
 	struct device			*bus_dev;
+	/* Whether to use MMIO or port I/O.  */
+	bool				mmio;
 	u32				full_duplex_enb;				/* FDDI Full Duplex enable (1 == on, 2 == off) */
 	u32				req_ttrt;					/* requested TTRT value (in 80ns units) */
 	u32				burst_size;					/* adapter burst size (enumerated) */
diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c
index eaf85db53a5e..14f07050b6b1 100644
--- a/drivers/net/fddi/defza.c
+++ b/drivers/net/fddi/defza.c
@@ -60,7 +60,7 @@
 static const char version[] =
 	DRV_NAME ": " DRV_VERSION "  " DRV_RELDATE "  Maciej W. Rozycki\n";
 
-MODULE_AUTHOR("Maciej W. Rozycki <macro@linux-mips.org>");
+MODULE_AUTHOR("Maciej W. Rozycki <macro@orcam.me.uk>");
 MODULE_DESCRIPTION("DEC FDDIcontroller 700 (DEFZA-xx) driver");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/net/fddi/skfp/h/smt.h b/drivers/net/fddi/skfp/h/smt.h
index a0dbc0f57a55..b19c7a99d32a 100644
--- a/drivers/net/fddi/skfp/h/smt.h
+++ b/drivers/net/fddi/skfp/h/smt.h
@@ -411,7 +411,7 @@ struct smt_p_reason {
 #define SMT_RDF_ILLEGAL 0x00000005	/* read only (PMF) */
 #define SMT_RDF_NOPARAM	0x6		/* parameter not supported (PMF) */
 #define SMT_RDF_RANGE	0x8		/* out of range */
-#define SMT_RDF_AUTHOR	0x9		/* not autohorized */
+#define SMT_RDF_AUTHOR	0x9		/* not authorized */
 #define SMT_RDF_LENGTH	0x0a		/* length error */
 #define SMT_RDF_TOOLONG	0x0b		/* length error */
 #define SMT_RDF_SBA	0x0d		/* SBA denied */
@@ -450,7 +450,7 @@ struct smt_p_version {
 
 struct smt_p_0015 {
 	struct smt_para	para ;		/* generic parameter header */
-	u_int		res_type ;	/* recsource type */
+	u_int		res_type ;	/* resource type */
 } ;
 
 #define	SYNC_BW		0x00000001L	/* Synchronous Bandwidth */
@@ -489,7 +489,7 @@ struct smt_p_0017 {
 struct smt_p_0018 {
 	struct smt_para	para ;		/* generic parameter header */
 	int		sba_ov_req ;	/* total sync bandwidth req for overhead*/
-} ;					/* measuered in bytes per T_Neg */
+} ;					/* measured in bytes per T_Neg */
 
 /*
  * P19 : SBA Allocation Address
@@ -562,7 +562,7 @@ struct smt_p_fsc {
 #define FSC_TYPE2	2		/* Special A/C indicator forwarding */
 
 /*
- * P00 21 : user defined authoriziation (see pmf.c)
+ * P00 21 : user defined authorization (see pmf.c)
  */
 #define SMT_P_AUTHOR	0x0021
 
@@ -764,10 +764,8 @@ struct smt_sif_operation {
 	struct smt_p_setcount	setcount ;	 /* Set Count mandatory */
 #endif
 	/* must be last */
-	struct smt_p_lem	lem[1] ;	/* phy lem status */
+	struct smt_p_lem	lem[];		/* phy lem status */
 } ;
-#define SIZEOF_SMT_SIF_OPERATION	(sizeof(struct smt_sif_operation)- \
-					 sizeof(struct smt_p_lem))
 
 /*
  * ECF : echo frame
diff --git a/drivers/net/fddi/skfp/smt.c b/drivers/net/fddi/skfp/smt.c
index 774a6e3b0a67..6b68a53f1b38 100644
--- a/drivers/net/fddi/skfp/smt.c
+++ b/drivers/net/fddi/skfp/smt.c
@@ -1063,9 +1063,9 @@ static void smt_send_sif_operation(struct s_smc *smc, struct fddi_addr *dest,
 #endif
 
 	if (!(mb = smt_build_frame(smc,SMT_SIF_OPER,SMT_REPLY,
-		SIZEOF_SMT_SIF_OPERATION+ports*sizeof(struct smt_p_lem))))
+				   struct_size(sif, lem, ports))))
 		return ;
-	sif = smtod(mb, struct smt_sif_operation *) ;
+	sif = smtod(mb, typeof(sif));
 	smt_fill_timestamp(smc,&sif->ts) ;	/* set time stamp */
 	smt_fill_mac_status(smc,&sif->status) ; /* set mac status */
 	smt_fill_mac_counter(smc,&sif->mc) ; /* set mac counter field */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index c64cc7639c39..5bce24731502 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1017,6 +1017,26 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
 }
 
 /* RCU already held by caller */
+/* Batching/bouncing logic is designed to attempt to optimize
+ * performance.
+ *
+ * For small, non-LSO packets we copy the packet to a send buffer
+ * which is pre-registered with the Hyper-V side. This enables the
+ * hypervisor to avoid remapping the aperture to access the packet
+ * descriptor and data.
+ *
+ * If we already started using a buffer and the netdev is transmitting
+ * a burst of packets, keep on copying into the buffer until it is
+ * full or we are done collecting a burst. If there is an existing
+ * buffer with space for the RNDIS descriptor but not the packet, copy
+ * the RNDIS descriptor to the buffer, keeping the packet in place.
+ *
+ * If we do batching and send more than one packet using a single
+ * NetVSC message, free the SKBs of the packets copied, except for the
+ * last packet. This is done to streamline the handling of the case
+ * where the last packet only had the RNDIS descriptor copied to the
+ * send buffer, with the data pointers included in the NetVSC message.
+ */
 int netvsc_send(struct net_device *ndev,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 15f262b70489..97b5c9b60503 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1612,34 +1612,23 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
-			memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+			ethtool_sprintf(&p, netvsc_stats[i].name);
 
-		for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
-			memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ARRAY_SIZE(vf_stats); i++)
+			ethtool_sprintf(&p, vf_stats[i].name);
 
 		for (i = 0; i < nvdev->num_chn; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_xdp_drop", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i);
 		}
 
 		for_each_present_cpu(cpu) {
-			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
-				sprintf(p, pcpu_stats[i].name, cpu);
-				p += ETH_GSTRING_LEN;
-			}
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++)
+				ethtool_sprintf(&p, pcpu_stats[i].name, cpu);
 		}
 
 		break;
diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig
index b68f1289b89e..90a90262e0d0 100644
--- a/drivers/net/ipa/Kconfig
+++ b/drivers/net/ipa/Kconfig
@@ -1,6 +1,6 @@
 config QCOM_IPA
 	tristate "Qualcomm IPA support"
-	depends on 64BIT && NET && QCOM_SMEM
+	depends on NET && QCOM_SMEM
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST)
 	select QCOM_MDT_LOADER if ARCH_QCOM
diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 390d3403386a..7f2a8fce5e0d 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -351,7 +351,7 @@ void *gsi_ring_virt(struct gsi_ring *ring, u32 index)
 /* Return the 32-bit DMA address associated with a ring index */
 static u32 gsi_ring_addr(struct gsi_ring *ring, u32 index)
 {
-	return (ring->addr & GENMASK(31, 0)) + index * GSI_RING_ELEMENT_SIZE;
+	return lower_32_bits(ring->addr) + index * GSI_RING_ELEMENT_SIZE;
 }
 
 /* Return the ring index of a 32-bit ring offset */
@@ -708,10 +708,9 @@ static void gsi_evt_ring_program(struct gsi *gsi, u32 evt_ring_id)
 	 * high-order 32 bits of the address of the event ring,
 	 * respectively.
 	 */
-	val = evt_ring->ring.addr & GENMASK(31, 0);
+	val = lower_32_bits(evt_ring->ring.addr);
 	iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_2_OFFSET(evt_ring_id));
-
-	val = evt_ring->ring.addr >> 32;
+	val = upper_32_bits(evt_ring->ring.addr);
 	iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_3_OFFSET(evt_ring_id));
 
 	/* Enable interrupt moderation by setting the moderation delay */
@@ -816,10 +815,9 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 	 * high-order 32 bits of the address of the channel ring,
 	 * respectively.
 	 */
-	val = channel->tre_ring.addr & GENMASK(31, 0);
+	val = lower_32_bits(channel->tre_ring.addr);
 	iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_2_OFFSET(channel_id));
-
-	val = channel->tre_ring.addr >> 32;
+	val = upper_32_bits(channel->tre_ring.addr);
 	iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_3_OFFSET(channel_id));
 
 	/* Command channel gets low weighted round-robin priority */
@@ -829,14 +827,14 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 
 	/* Max prefetch is 1 segment (do not set MAX_PREFETCH_FMASK) */
 
-	/* We enable the doorbell engine for IPA v3.5.1 */
-	if (gsi->version == IPA_VERSION_3_5_1 && doorbell)
+	/* No need to use the doorbell engine starting at IPA v4.0 */
+	if (gsi->version < IPA_VERSION_4_0 && doorbell)
 		val |= USE_DB_ENG_FMASK;
 
 	/* v4.0 introduces an escape buffer for prefetch.  We use it
 	 * on all but the AP command channel.
 	 */
-	if (gsi->version != IPA_VERSION_3_5_1 && !channel->command) {
+	if (gsi->version >= IPA_VERSION_4_0 && !channel->command) {
 		/* If not otherwise set, prefetch buffers are used */
 		if (gsi->version < IPA_VERSION_4_5)
 			val |= USE_ESCAPE_BUF_ONLY_FMASK;
@@ -975,7 +973,7 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell)
 
 	gsi_channel_reset_command(channel);
 	/* Due to a hardware quirk we may need to reset RX channels twice. */
-	if (gsi->version == IPA_VERSION_3_5_1 && !channel->toward_ipa)
+	if (gsi->version < IPA_VERSION_4_0 && !channel->toward_ipa)
 		gsi_channel_reset_command(channel);
 
 	gsi_channel_program(channel, doorbell);
@@ -1337,10 +1335,9 @@ static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
 	int ret;
 
 	ret = platform_get_irq_byname(pdev, "gsi");
-	if (ret <= 0) {
-		dev_err(dev, "DT error %d getting \"gsi\" IRQ property\n", ret);
+	if (ret <= 0)
 		return ret ? : -EINVAL;
-	}
+
 	irq = ret;
 
 	ret = request_irq(irq, gsi_isr, 0, "gsi", gsi);
@@ -1366,7 +1363,7 @@ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
 	u32 tre_index;
 
 	/* Event xfer_ptr records the TRE it's associated with */
-	tre_offset = le64_to_cpu(event->xfer_ptr) & GENMASK(31, 0);
+	tre_offset = lower_32_bits(le64_to_cpu(event->xfer_ptr));
 	tre_index = gsi_ring_index(&channel->tre_ring, tre_offset);
 
 	return gsi_channel_trans_mapped(channel, tre_index);
@@ -1439,15 +1436,18 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
 /* Initialize a ring, including allocating DMA memory for its entries */
 static int gsi_ring_alloc(struct gsi *gsi, struct gsi_ring *ring, u32 count)
 {
-	size_t size = count * GSI_RING_ELEMENT_SIZE;
+	u32 size = count * GSI_RING_ELEMENT_SIZE;
 	struct device *dev = gsi->dev;
 	dma_addr_t addr;
 
-	/* Hardware requires a 2^n ring size, with alignment equal to size */
+	/* Hardware requires a 2^n ring size, with alignment equal to size.
+	 * The size is a power of 2, so we can check alignment using just
+	 * the bottom 32 bits for a DMA address of any size.
+	 */
 	ring->virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL);
-	if (ring->virt && addr % size) {
+	if (ring->virt && lower_32_bits(addr) % size) {
 		dma_free_coherent(dev, size, ring->virt, addr);
-		dev_err(dev, "unable to alloc 0x%zx-aligned ring buffer\n",
+		dev_err(dev, "unable to alloc 0x%x-aligned ring buffer\n",
 			size);
 		return -EINVAL;	/* Not a good error value, but distinct */
 	} else if (!ring->virt) {
diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h
index efc980f96109..d5996bdb20ef 100644
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -16,8 +16,8 @@
 #include "ipa_version.h"
 
 /* Maximum number of channels and event rings supported by the driver */
-#define GSI_CHANNEL_COUNT_MAX	17
-#define GSI_EVT_RING_COUNT_MAX	13
+#define GSI_CHANNEL_COUNT_MAX	23
+#define GSI_EVT_RING_COUNT_MAX	20
 
 /* Maximum TLV FIFO size for a channel; 64 here is arbitrary (and high) */
 #define GSI_TLV_MAX		64
diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index d73b03a80ef8..2ac6dd8413de 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -71,13 +71,12 @@ struct ipa_cmd_hw_hdr_init_local {
 
 /* IPA_CMD_REGISTER_WRITE */
 
-/* For IPA v4.0+, this opcode gets modified with pipeline clear options */
-
+/* For IPA v4.0+, the pipeline clear options are encoded in the opcode */
 #define REGISTER_WRITE_OPCODE_SKIP_CLEAR_FMASK		GENMASK(8, 8)
 #define REGISTER_WRITE_OPCODE_CLEAR_OPTION_FMASK	GENMASK(10, 9)
 
 struct ipa_cmd_register_write {
-	__le16 flags;		/* Unused/reserved for IPA v3.5.1 */
+	__le16 flags;		/* Unused/reserved prior to IPA v4.0 */
 	__le16 offset;
 	__le32 value;
 	__le32 value_mask;
@@ -85,12 +84,12 @@ struct ipa_cmd_register_write {
 };
 
 /* Field masks for ipa_cmd_register_write structure fields */
-/* The next field is present for IPA v4.0 and above */
+/* The next field is present for IPA v4.0+ */
 #define REGISTER_WRITE_FLAGS_OFFSET_HIGH_FMASK		GENMASK(14, 11)
-/* The next field is present for IPA v3.5.1 only */
+/* The next field is not present for IPA v4.0+ */
 #define REGISTER_WRITE_FLAGS_SKIP_CLEAR_FMASK		GENMASK(15, 15)
 
-/* The next field and its values are present for IPA v3.5.1 only */
+/* The next field and its values are not present for IPA v4.0+ */
 #define REGISTER_WRITE_CLEAR_OPTIONS_FMASK		GENMASK(1, 0)
 
 /* IPA_CMD_IP_PACKET_INIT */
@@ -123,7 +122,7 @@ struct ipa_cmd_hw_dma_mem_mem {
 
 /* Field masks for ipa_cmd_hw_dma_mem_mem structure fields */
 #define DMA_SHARED_MEM_FLAGS_DIRECTION_FMASK		GENMASK(0, 0)
-/* The next two fields are present for IPA v3.5.1 only. */
+/* The next two fields are not present for IPA v4.0+ */
 #define DMA_SHARED_MEM_FLAGS_SKIP_CLEAR_FMASK		GENMASK(1, 1)
 #define DMA_SHARED_MEM_FLAGS_CLEAR_OPTIONS_FMASK	GENMASK(3, 2)
 
@@ -253,11 +252,12 @@ static bool ipa_cmd_register_write_offset_valid(struct ipa *ipa,
 	u32 bit_count;
 
 	/* The maximum offset in a register_write immediate command depends
-	 * on the version of IPA.  IPA v3.5.1 supports a 16 bit offset, but
-	 * newer versions allow some additional high-order bits.
+	 * on the version of IPA.  A 16 bit offset is always supported,
+	 * but starting with IPA v4.0 some additional high-order bits are
+	 * allowed.
 	 */
 	bit_count = BITS_PER_BYTE * sizeof(payload->offset);
-	if (ipa->version != IPA_VERSION_3_5_1)
+	if (ipa->version >= IPA_VERSION_4_0)
 		bit_count += hweight32(REGISTER_WRITE_FLAGS_OFFSET_HIGH_FMASK);
 	BUILD_BUG_ON(bit_count > 32);
 	offset_max = ~0U >> (32 - bit_count);
@@ -456,7 +456,11 @@ void ipa_cmd_register_write_add(struct gsi_trans *trans, u32 offset, u32 value,
 	/* pipeline_clear_src_grp is not used */
 	clear_option = clear_full ? pipeline_clear_full : pipeline_clear_hps;
 
-	if (ipa->version != IPA_VERSION_3_5_1) {
+	/* IPA v4.0+ represents the pipeline clear options in the opcode.  It
+	 * also supports a larger offset by encoding additional high-order
+	 * bits in the payload flags field.
+	 */
+	if (ipa->version >= IPA_VERSION_4_0) {
 		u16 offset_high;
 		u32 val;
 
diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 997b51ceb7d7..621ad15c9e67 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -9,6 +9,15 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/* QSB configuration for the SC7180 SoC. */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes	= 8,
+		.max_reads	= 12,
+		/* no outstanding read byte (beat) limit */
+	},
+};
+
 /* Endpoint configuration for the SC7180 SoC. */
 static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 	[IPA_ENDPOINT_AP_COMMAND_TX] = {
@@ -22,11 +31,13 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 20,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_DMA_ONLY,
 			.config = {
 				.resource_group	= 0,
 				.dma_mode	= true,
 				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
 			},
 		},
 	},
@@ -41,7 +52,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 6,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
 			.config = {
 				.resource_group	= 0,
 				.aggregation	= true,
@@ -64,14 +74,14 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.filter_support	= true,
-			.seq_type	=
-				IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP,
 			.config = {
 				.resource_group	= 0,
 				.checksum	= true,
 				.qmap		= true,
 				.status_enable	= true,
 				.tx = {
+					.seq_type = IPA_SEQ_1_PASS_SKIP_LAST_UC,
+					.seq_rep_type = IPA_SEQ_REP_DMA_PARSER,
 					.status_endpoint =
 						IPA_ENDPOINT_MODEM_AP_RX,
 				},
@@ -89,7 +99,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 6,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
 			.config = {
 				.resource_group	= 0,
 				.checksum	= true,
@@ -206,7 +215,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_INFO] = {
 		.offset		= 0x0080,
 		.size		= 0x0200,
-		.canary_count	= 2,
+		.canary_count	= 0,
 	},
 	[IPA_MEM_V4_FILTER_HASHED] = {
 		.offset		= 0x0288,
@@ -253,11 +262,6 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_HEADER] = {
-		.offset		= 0x05e8,
-		.size		= 0x0000,
-		.canary_count	= 0,
-	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
 		.offset		= 0x05f0,
 		.size		= 0x0200,
@@ -273,7 +277,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0050,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_STATS_QUOTA] = {
+	[IPA_MEM_STATS_QUOTA_MODEM] = {
 		.offset		= 0x0a50,
 		.size		= 0x0060,
 		.canary_count	= 2,
@@ -283,11 +287,6 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0140,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_DROP] = {
-		.offset		= 0x0bf0,
-		.size		= 0,
-		.canary_count	= 0,
-	},
 	[IPA_MEM_MODEM] = {
 		.offset		= 0x0bf0,
 		.size		= 0x140c,
@@ -300,7 +299,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 	},
 };
 
-static struct ipa_mem_data ipa_mem_data = {
+static const struct ipa_mem_data ipa_mem_data = {
 	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
 	.local		= ipa_mem_local_data,
 	.imem_addr	= 0x146a8000,
@@ -310,7 +309,7 @@ static struct ipa_mem_data ipa_mem_data = {
 };
 
 /* Interconnect bandwidths are in 1000 byte/second units */
-static struct ipa_interconnect_data ipa_interconnect_data[] = {
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 	{
 		.name			= "memory",
 		.peak_bandwidth		= 465000,	/* 465 MBps */
@@ -329,7 +328,7 @@ static struct ipa_interconnect_data ipa_interconnect_data[] = {
 	},
 };
 
-static struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_clock_data ipa_clock_data = {
 	.core_clock_rate	= 100 * 1000 * 1000,	/* Hz */
 	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
 	.interconnect_data	= ipa_interconnect_data,
@@ -338,6 +337,8 @@ static struct ipa_clock_data ipa_clock_data = {
 /* Configuration data for the SC7180 SoC. */
 const struct ipa_data ipa_data_sc7180 = {
 	.version	= IPA_VERSION_4_2,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
 	.endpoint_data	= ipa_gsi_endpoint_data,
 	.resource_data	= &ipa_resource_data,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 88c9c3562ab7..6b5173f47444 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -11,6 +11,18 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/* QSB configuration for the SDM845 SoC. */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes	= 8,
+		.max_reads	= 8,
+	},
+	[IPA_QSB_MASTER_PCIE] = {
+		.max_writes	= 4,
+		.max_reads	= 12,
+	},
+};
+
 /* Endpoint configuration for the SDM845 SoC. */
 static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 	[IPA_ENDPOINT_AP_COMMAND_TX] = {
@@ -24,11 +36,13 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 20,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_DMA_ONLY,
 			.config = {
 				.resource_group	= 1,
 				.dma_mode	= true,
 				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
 			},
 		},
 	},
@@ -43,7 +57,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 8,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
 			.config = {
 				.resource_group	= 1,
 				.aggregation	= true,
@@ -66,14 +79,13 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.filter_support	= true,
-			.seq_type	=
-				IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP,
 			.config = {
 				.resource_group	= 1,
 				.checksum	= true,
 				.qmap		= true,
 				.status_enable	= true,
 				.tx = {
+					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
 					.status_endpoint =
 						IPA_ENDPOINT_MODEM_AP_RX,
 				},
@@ -91,7 +103,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 8,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
 			.config = {
 				.resource_group	= 1,
 				.checksum	= true,
@@ -293,11 +304,6 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_HEADER] = {
-		.offset		= 0x07c8,
-		.size		= 0x0000,
-		.canary_count	= 0,
-	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
 		.offset		= 0x07d0,
 		.size		= 0x0200,
@@ -320,7 +326,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 	},
 };
 
-static struct ipa_mem_data ipa_mem_data = {
+static const struct ipa_mem_data ipa_mem_data = {
 	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
 	.local		= ipa_mem_local_data,
 	.imem_addr	= 0x146bd000,
@@ -330,7 +336,7 @@ static struct ipa_mem_data ipa_mem_data = {
 };
 
 /* Interconnect bandwidths are in 1000 byte/second units */
-static struct ipa_interconnect_data ipa_interconnect_data[] = {
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 	{
 		.name			= "memory",
 		.peak_bandwidth		= 600000,	/* 600 MBps */
@@ -349,7 +355,7 @@ static struct ipa_interconnect_data ipa_interconnect_data[] = {
 	},
 };
 
-static struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_clock_data ipa_clock_data = {
 	.core_clock_rate	= 75 * 1000 * 1000,	/* Hz */
 	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
 	.interconnect_data	= ipa_interconnect_data,
@@ -358,6 +364,8 @@ static struct ipa_clock_data ipa_clock_data = {
 /* Configuration data for the SDM845 SoC. */
 const struct ipa_data ipa_data_sdm845 = {
 	.version	= IPA_VERSION_3_5_1,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
 	.endpoint_data	= ipa_gsi_endpoint_data,
 	.resource_data	= &ipa_resource_data,
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index b476fc373f7f..7816583fc14a 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -18,8 +18,9 @@
  * Boot-time configuration data is used to define the configuration of the
  * IPA and GSI resources to use for a given platform.  This data is supplied
  * via the Device Tree match table, associated with a particular compatible
- * string.  The data defines information about resources, endpoints, and
- * channels.
+ * string.  The data defines information about how resources, endpoints and
+ * channels, memory, clocking and so on are allocated and used for the
+ * platform.
  *
  * Resources are data structures used internally by the IPA hardware.  The
  * configuration data defines the number (or limits of the number) of various
@@ -49,6 +50,24 @@
 #define IPA_RESOURCE_GROUP_SRC_MAX	5
 #define IPA_RESOURCE_GROUP_DST_MAX	5
 
+/** enum ipa_qsb_master_id - array index for IPA QSB configuration data */
+enum ipa_qsb_master_id {
+	IPA_QSB_MASTER_DDR,
+	IPA_QSB_MASTER_PCIE,
+};
+
+/**
+ * struct ipa_qsb_data - Qualcomm System Bus configuration data
+ * @max_writes:	Maximum outstanding write requests for this master
+ * @max_reads:	Maximum outstanding read requests for this master
+ * @max_reads_beats: Max outstanding read bytes in 8-byte "beats" (if non-zero)
+ */
+struct ipa_qsb_data {
+	u8 max_writes;
+	u8 max_reads;
+	u8 max_reads_beats;		/* Not present for IPA v3.5.1 */
+};
+
 /**
  * struct gsi_channel_data - GSI channel configuration data
  * @tre_count:		number of TREs in the channel ring
@@ -57,10 +76,10 @@
  *
  * A GSI channel is a unidirectional means of transferring data to or
  * from (and through) the IPA.  A GSI channel has a ring buffer made
- * up of "transfer elements" (TREs) that specify individual data transfers
- * or IPA immediate commands.  TREs are filled by the AP, and control
- * is passed to IPA hardware by writing the last written element
- * into a doorbell register.
+ * up of "transfer ring elements" (TREs) that specify individual data
+ * transfers or IPA immediate commands.  TREs are filled by the AP,
+ * and control is passed to IPA hardware by writing the last written
+ * element into a doorbell register.
  *
  * When data transfer commands have completed the GSI generates an
  * event (a structure of data) and optionally signals the AP with
@@ -79,12 +98,16 @@ struct gsi_channel_data {
 
 /**
  * struct ipa_endpoint_tx_data - configuration data for TX endpoints
+ * @seq_type:		primary packet processing sequencer type
+ * @seq_rep_type:	sequencer type for replication processing
  * @status_endpoint:	endpoint to which status elements are sent
  *
  * The @status_endpoint is only valid if the endpoint's @status_enable
  * flag is set.
  */
 struct ipa_endpoint_tx_data {
+	enum ipa_seq_type seq_type;
+	enum ipa_seq_rep_type seq_rep_type;
 	enum ipa_endpoint_name status_endpoint;
 };
 
@@ -136,7 +159,6 @@ struct ipa_endpoint_config_data {
 /**
  * struct ipa_endpoint_data - IPA endpoint configuration data
  * @filter_support:	whether endpoint supports filtering
- * @seq_type:		hardware sequencer type used for endpoint
  * @config:		hardware configuration (see above)
  *
  * Not all endpoints support the IPA filtering capability.  A filter table
@@ -146,14 +168,10 @@ struct ipa_endpoint_config_data {
  * in the system, and indicate whether they support filtering.
  *
  * The remaining endpoint configuration data applies only to AP endpoints.
- * The IPA hardware is implemented by sequencers, and the AP must program
- * the type(s) of these sequencers at initialization time.  The remaining
- * endpoint configuration data is defined above.
  */
 struct ipa_endpoint_data {
 	bool filter_support;
-	/* The next two are specified only for AP endpoints */
-	enum ipa_seq_type seq_type;
+	/* Everything else is specified only for AP endpoints */
 	struct ipa_endpoint_config_data config;
 };
 
@@ -247,7 +265,7 @@ struct ipa_resource_data {
  * @imem_addr:		physical address of IPA region within IMEM
  * @imem_size:		size in bytes of IPA IMEM region
  * @smem_id:		item identifier for IPA region within SMEM memory
- * @imem_size:		size in bytes of the IPA SMEM region
+ * @smem_size:		size in bytes of the IPA SMEM region
  */
 struct ipa_mem_data {
 	u32 local_count;
@@ -285,15 +303,19 @@ struct ipa_clock_data {
 /**
  * struct ipa_data - combined IPA/GSI configuration data
  * @version:		IPA hardware version
- * @endpoint_count:	number of entries in endpoint_data array
+ * @qsb_count:		number of entries in the qsb_data array
+ * @qsb_data:		Qualcomm System Bus configuration data
+ * @endpoint_count:	number of entries in the endpoint_data array
  * @endpoint_data:	IPA endpoint/GSI channel data
  * @resource_data:	IPA resource configuration data
- * @mem_count:		number of entries in mem_data array
- * @mem_data:		IPA-local shared memory region data
+ * @mem_data:		IPA memory region data
+ * @clock_data:		IPA clock and interconnect data
  */
 struct ipa_data {
 	enum ipa_version version;
-	u32 endpoint_count;	/* # entries in endpoint_data[] */
+	u32 qsb_count;		/* number of entries in qsb_data[] */
+	const struct ipa_qsb_data *qsb_data;
+	u32 endpoint_count;	/* number of entries in endpoint_data[] */
 	const struct ipa_gsi_endpoint_data *endpoint_data;
 	const struct ipa_resource_data *resource_data;
 	const struct ipa_mem_data *mem_data;
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 7209ee3c3124..38e83cd467b5 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2020 Linaro Ltd.
+ * Copyright (C) 2019-2021 Linaro Ltd.
  */
 
 #include <linux/types.h>
@@ -266,7 +266,7 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 	 * if (endpoint->toward_ipa)
 	 * 	assert(ipa->version != IPA_VERSION_4.2);
 	 * else
-	 * 	assert(ipa->version == IPA_VERSION_3_5_1);
+	 *	assert(ipa->version < IPA_VERSION_4_0);
 	 */
 	mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK;
 
@@ -347,7 +347,7 @@ ipa_endpoint_program_suspend(struct ipa_endpoint *endpoint, bool enable)
 {
 	bool suspended;
 
-	if (endpoint->ipa->version != IPA_VERSION_3_5_1)
+	if (endpoint->ipa->version >= IPA_VERSION_4_0)
 		return enable;	/* For IPA v4.0+, no change made */
 
 	/* assert(!endpoint->toward_ipa); */
@@ -468,6 +468,20 @@ static void ipa_endpoint_init_cfg(struct ipa_endpoint *endpoint)
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
 
+static void ipa_endpoint_init_nat(struct ipa_endpoint *endpoint)
+{
+	u32 offset;
+	u32 val;
+
+	if (!endpoint->toward_ipa)
+		return;
+
+	offset = IPA_REG_ENDP_INIT_NAT_N_OFFSET(endpoint->endpoint_id);
+	val = u32_encode_bits(IPA_NAT_BYPASS, NAT_EN_FMASK);
+
+	iowrite32(val, endpoint->ipa->reg_virt + offset);
+}
+
 /**
  * ipa_endpoint_init_hdr() - Initialize HDR endpoint configuration register
  * @endpoint:	Endpoint pointer
@@ -515,7 +529,7 @@ static void ipa_endpoint_init_hdr(struct ipa_endpoint *endpoint)
 			/* Where IPA will write the length */
 			offset = offsetof(struct rmnet_map_header, pkt_len);
 			/* Upper bits are stored in HDR_EXT with IPA v4.5 */
-			if (version == IPA_VERSION_4_5)
+			if (version >= IPA_VERSION_4_5)
 				offset &= field_mask(HDR_OFST_PKT_SIZE_FMASK);
 
 			val |= HDR_OFST_PKT_SIZE_VALID_FMASK;
@@ -562,7 +576,7 @@ static void ipa_endpoint_init_hdr_ext(struct ipa_endpoint *endpoint)
 	/* IPA v4.5 adds some most-significant bits to a few fields,
 	 * two of which are defined in the HDR (not HDR_EXT) register.
 	 */
-	if (ipa->version == IPA_VERSION_4_5) {
+	if (ipa->version >= IPA_VERSION_4_5) {
 		/* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0, so MSB is 0 */
 		if (endpoint->data->qmap && !endpoint->toward_ipa) {
 			u32 offset;
@@ -776,7 +790,7 @@ static u32 hol_block_timer_val(struct ipa *ipa, u32 microseconds)
 	if (!microseconds)
 		return 0;	/* Nothing to compute if timer period is 0 */
 
-	if (ipa->version == IPA_VERSION_4_5)
+	if (ipa->version >= IPA_VERSION_4_5)
 		return hol_block_timer_qtime_val(ipa, microseconds);
 
 	/* Use 64 bit arithmetic to avoid overflow... */
@@ -884,18 +898,17 @@ static void ipa_endpoint_init_rsrc_grp(struct ipa_endpoint *endpoint)
 static void ipa_endpoint_init_seq(struct ipa_endpoint *endpoint)
 {
 	u32 offset = IPA_REG_ENDP_INIT_SEQ_N_OFFSET(endpoint->endpoint_id);
-	u32 seq_type = endpoint->seq_type;
 	u32 val = 0;
 
 	if (!endpoint->toward_ipa)
 		return;		/* Register not valid for RX endpoints */
 
-	/* Sequencer type is made up of four nibbles */
-	val |= u32_encode_bits(seq_type & 0xf, HPS_SEQ_TYPE_FMASK);
-	val |= u32_encode_bits((seq_type >> 4) & 0xf, DPS_SEQ_TYPE_FMASK);
-	/* The second two apply to replicated packets */
-	val |= u32_encode_bits((seq_type >> 8) & 0xf, HPS_REP_SEQ_TYPE_FMASK);
-	val |= u32_encode_bits((seq_type >> 12) & 0xf, DPS_REP_SEQ_TYPE_FMASK);
+	/* Low-order byte configures primary packet processing */
+	val |= u32_encode_bits(endpoint->data->tx.seq_type, SEQ_TYPE_FMASK);
+
+	/* Second byte configures replicated packet processing */
+	val |= u32_encode_bits(endpoint->data->tx.seq_rep_type,
+			       SEQ_REP_TYPE_FMASK);
 
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
@@ -1469,8 +1482,7 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
 	 * is active, we need to handle things specially to recover.
 	 * All other cases just need to reset the underlying GSI channel.
 	 */
-	special = ipa->version == IPA_VERSION_3_5_1 &&
-			!endpoint->toward_ipa &&
+	special = ipa->version < IPA_VERSION_4_0 && !endpoint->toward_ipa &&
 			endpoint->data->aggregation;
 	if (special && ipa_endpoint_aggr_active(endpoint))
 		ret = ipa_endpoint_reset_rx_aggr(endpoint);
@@ -1490,6 +1502,7 @@ static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
 	else
 		(void)ipa_endpoint_program_suspend(endpoint, false);
 	ipa_endpoint_init_cfg(endpoint);
+	ipa_endpoint_init_nat(endpoint);
 	ipa_endpoint_init_hdr(endpoint);
 	ipa_endpoint_init_hdr_ext(endpoint);
 	ipa_endpoint_init_hdr_metadata_mask(endpoint);
@@ -1568,8 +1581,10 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
 		(void)ipa_endpoint_program_suspend(endpoint, true);
 	}
 
-	/* IPA v3.5.1 doesn't use channel stop for suspend */
-	stop_channel = endpoint->ipa->version != IPA_VERSION_3_5_1;
+	/* Starting with IPA v4.0, endpoints are suspended by stopping the
+	 * underlying GSI channel rather than using endpoint suspend mode.
+	 */
+	stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
 	ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel);
 	if (ret)
 		dev_err(dev, "error %d suspending channel %u\n", ret,
@@ -1589,8 +1604,10 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
 	if (!endpoint->toward_ipa)
 		(void)ipa_endpoint_program_suspend(endpoint, false);
 
-	/* IPA v3.5.1 doesn't use channel start for resume */
-	start_channel = endpoint->ipa->version != IPA_VERSION_3_5_1;
+	/* Starting with IPA v4.0, the underlying GSI channel must be
+	 * restarted for resume.
+	 */
+	start_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
 	ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
 	if (ret)
 		dev_err(dev, "error %d resuming channel %u\n", ret,
@@ -1766,7 +1783,6 @@ static void ipa_endpoint_init_one(struct ipa *ipa, enum ipa_endpoint_name name,
 
 	endpoint->ipa = ipa;
 	endpoint->ee_id = data->ee_id;
-	endpoint->seq_type = data->endpoint.seq_type;
 	endpoint->channel_id = data->channel_id;
 	endpoint->endpoint_id = data->endpoint_id;
 	endpoint->toward_ipa = data->toward_ipa;
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 881ecc27bd6e..c6c55ea35394 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -46,7 +46,6 @@ enum ipa_endpoint_name {
  */
 struct ipa_endpoint {
 	struct ipa *ipa;
-	enum ipa_seq_type seq_type;
 	enum gsi_ee_id ee_id;
 	u32 channel_id;
 	u32 endpoint_id;
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 97c1b55405cb..ba1bfc30210a 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -227,8 +227,8 @@ static void ipa_hardware_config_comp(struct ipa *ipa)
 {
 	u32 val;
 
-	/* Nothing to configure for IPA v3.5.1 */
-	if (ipa->version == IPA_VERSION_3_5_1)
+	/* Nothing to configure prior to IPA v4.0 */
+	if (ipa->version < IPA_VERSION_4_0)
 		return;
 
 	val = ioread32(ipa->reg_virt + IPA_REG_COMP_CFG_OFFSET);
@@ -249,56 +249,59 @@ static void ipa_hardware_config_comp(struct ipa *ipa)
 	iowrite32(val, ipa->reg_virt + IPA_REG_COMP_CFG_OFFSET);
 }
 
-/* Configure DDR and PCIe max read/write QSB values */
-static void ipa_hardware_config_qsb(struct ipa *ipa)
+/* Configure DDR and (possibly) PCIe max read/write QSB values */
+static void
+ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
 {
-	enum ipa_version version = ipa->version;
-	u32 max0;
-	u32 max1;
+	const struct ipa_qsb_data *data0;
+	const struct ipa_qsb_data *data1;
 	u32 val;
 
-	/* QMB_0 represents DDR; QMB_1 represents PCIe */
-	val = u32_encode_bits(8, GEN_QMB_0_MAX_WRITES_FMASK);
-	switch (version) {
-	case IPA_VERSION_4_2:
-		max1 = 0;		/* PCIe not present */
-		break;
-	case IPA_VERSION_4_5:
-		max1 = 8;
-		break;
-	default:
-		max1 = 4;
-		break;
-	}
-	val |= u32_encode_bits(max1, GEN_QMB_1_MAX_WRITES_FMASK);
+	/* assert(data->qsb_count > 0); */
+	/* assert(data->qsb_count < 3); */
+
+	/* QMB 0 represents DDR; QMB 1 (if present) represents PCIe */
+	data0 = &data->qsb_data[IPA_QSB_MASTER_DDR];
+	if (data->qsb_count > 1)
+		data1 = &data->qsb_data[IPA_QSB_MASTER_PCIE];
+
+	/* Max outstanding write accesses for QSB masters */
+	val = u32_encode_bits(data0->max_writes, GEN_QMB_0_MAX_WRITES_FMASK);
+	if (data->qsb_count > 1)
+		val |= u32_encode_bits(data1->max_writes,
+				       GEN_QMB_1_MAX_WRITES_FMASK);
 	iowrite32(val, ipa->reg_virt + IPA_REG_QSB_MAX_WRITES_OFFSET);
 
-	max1 = 12;
-	switch (version) {
-	case IPA_VERSION_3_5_1:
-		max0 = 8;
-		break;
-	case IPA_VERSION_4_0:
-	case IPA_VERSION_4_1:
-		max0 = 12;
-		break;
-	case IPA_VERSION_4_2:
-		max0 = 12;
-		max1 = 0;		/* PCIe not present */
-		break;
-	case IPA_VERSION_4_5:
-		max0 = 0;		/* No limit (hardware maximum) */
-		break;
-	}
-	val = u32_encode_bits(max0, GEN_QMB_0_MAX_READS_FMASK);
-	val |= u32_encode_bits(max1, GEN_QMB_1_MAX_READS_FMASK);
-	if (version != IPA_VERSION_3_5_1) {
-		/* GEN_QMB_0_MAX_READS_BEATS is 0 */
-		/* GEN_QMB_1_MAX_READS_BEATS is 0 */
+	/* Max outstanding read accesses for QSB masters */
+	val = u32_encode_bits(data0->max_reads, GEN_QMB_0_MAX_READS_FMASK);
+	if (ipa->version >= IPA_VERSION_4_0)
+		val |= u32_encode_bits(data0->max_reads_beats,
+				       GEN_QMB_0_MAX_READS_BEATS_FMASK);
+	if (data->qsb_count > 1) {
+		val |= u32_encode_bits(data1->max_reads,
+				       GEN_QMB_1_MAX_READS_FMASK);
+		if (ipa->version >= IPA_VERSION_4_0)
+			val |= u32_encode_bits(data1->max_reads_beats,
+					       GEN_QMB_1_MAX_READS_BEATS_FMASK);
 	}
 	iowrite32(val, ipa->reg_virt + IPA_REG_QSB_MAX_READS_OFFSET);
 }
 
+/* The internal inactivity timer clock is used for the aggregation timer */
+#define TIMER_FREQUENCY	32000		/* 32 KHz inactivity timer clock */
+
+/* Compute the value to use in the COUNTER_CFG register AGGR_GRANULARITY
+ * field to represent the given number of microseconds.  The value is one
+ * less than the number of timer ticks in the requested period.  0 is not
+ * a valid granularity value.
+ */
+static u32 ipa_aggr_granularity_val(u32 usec)
+{
+	/* assert(usec != 0); */
+
+	return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1;
+}
+
 /* IPA uses unified Qtime starting at IPA v4.5, implementing various
  * timestamps and timers independent of the IPA core clock rate.  The
  * Qtimer is based on a 56-bit timestamp incremented at each tick of
@@ -385,8 +388,9 @@ static void ipa_hardware_dcd_deconfig(struct ipa *ipa)
 /**
  * ipa_hardware_config() - Primitive hardware initialization
  * @ipa:	IPA pointer
+ * @data:	IPA configuration data
  */
-static void ipa_hardware_config(struct ipa *ipa)
+static void ipa_hardware_config(struct ipa *ipa, const struct ipa_data *data)
 {
 	enum ipa_version version = ipa->version;
 	u32 granularity;
@@ -399,7 +403,7 @@ static void ipa_hardware_config(struct ipa *ipa)
 	}
 
 	/* Implement some hardware workarounds */
-	if (version != IPA_VERSION_3_5_1 && version < IPA_VERSION_4_5) {
+	if (version >= IPA_VERSION_4_0 && version < IPA_VERSION_4_5) {
 		/* Enable open global clocks (not needed for IPA v4.5) */
 		val = GLOBAL_FMASK;
 		val |= GLOBAL_2X_CLK_FMASK;
@@ -414,7 +418,7 @@ static void ipa_hardware_config(struct ipa *ipa)
 	ipa_hardware_config_comp(ipa);
 
 	/* Configure system bus limits */
-	ipa_hardware_config_qsb(ipa);
+	ipa_hardware_config_qsb(ipa, data);
 
 	if (version < IPA_VERSION_4_5) {
 		/* Configure aggregation timer granularity */
@@ -610,7 +614,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
 	 */
 	ipa_clock_get(ipa);
 
-	ipa_hardware_config(ipa);
+	ipa_hardware_config(ipa, data);
 
 	ret = ipa_endpoint_config(ipa);
 	if (ret)
@@ -735,8 +739,14 @@ MODULE_DEVICE_TABLE(of, ipa_match);
 static void ipa_validate_build(void)
 {
 #ifdef IPA_VALIDATE
-	/* We assume we're working on 64-bit hardware */
-	BUILD_BUG_ON(!IS_ENABLED(CONFIG_64BIT));
+	/* At one time we assumed a 64-bit build, allowing some do_div()
+	 * calls to be replaced by simple division or modulo operations.
+	 * We currently only perform divide and modulo operations on u32,
+	 * u16, or size_t objects, and of those only size_t has any chance
+	 * of being a 64-bit value.  (It should be guaranteed 32 bits wide
+	 * on a 32-bit build, but there is no harm in verifying that.)
+	 */
+	BUILD_BUG_ON(!IS_ENABLED(CONFIG_64BIT) && sizeof(size_t) != 4);
 
 	/* Code assumes the EE ID for the AP is 0 (zeroed structure field) */
 	BUILD_BUG_ON(GSI_EE_AP != 0);
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index f25029b9ec85..32907dde5dc6 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -61,6 +61,7 @@ int ipa_mem_setup(struct ipa *ipa)
 	struct gsi_trans *trans;
 	u32 offset;
 	u16 size;
+	u32 val;
 
 	/* Get a transaction to define the header memory region and to zero
 	 * the processing context and modem memory regions.
@@ -89,8 +90,9 @@ int ipa_mem_setup(struct ipa *ipa)
 	gsi_trans_commit_wait(trans);
 
 	/* Tell the hardware where the processing context area is located */
-	iowrite32(ipa->mem_offset + ipa->mem[IPA_MEM_MODEM_PROC_CTX].offset,
-		  ipa->reg_virt + IPA_REG_LOCAL_PKT_PROC_CNTXT_BASE_OFFSET);
+	offset = ipa->mem_offset + ipa->mem[IPA_MEM_MODEM_PROC_CTX].offset;
+	val = proc_cntxt_base_addr_encoded(ipa->version, offset);
+	iowrite32(val, ipa->reg_virt + IPA_REG_LOCAL_PKT_PROC_CNTXT_OFFSET);
 
 	return 0;
 }
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index f99180f84f0d..f82e8939622b 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -28,6 +28,7 @@ struct ipa_mem_data;
  * The set of memory regions is defined in configuration data.  They are
  * subject to these constraints:
  * - a zero offset and zero size represents and undefined region
+ * - a region's size does not include space for its "canary" values
  * - a region's offset is defined to be *past* all "canary" values
  * - offset must be large enough to account for all canaries
  * - a region's size may be zero, but may still have canaries
@@ -56,9 +57,16 @@ enum ipa_mem_id {
 	IPA_MEM_AP_HEADER,		/* 0 canaries */
 	IPA_MEM_MODEM_PROC_CTX,		/* 2 canaries */
 	IPA_MEM_AP_PROC_CTX,		/* 0 canaries */
+	IPA_MEM_NAT_TABLE,		/* 4 canaries (IPA v4.5 and above) */
 	IPA_MEM_PDN_CONFIG,		/* 2 canaries (IPA v4.0 and above) */
-	IPA_MEM_STATS_QUOTA,		/* 2 canaries (IPA v4.0 and above) */
+	IPA_MEM_STATS_QUOTA_MODEM,	/* 2 canaries (IPA v4.0 and above) */
+	IPA_MEM_STATS_QUOTA_AP,		/* 0 canaries (IPA v4.0 and above) */
 	IPA_MEM_STATS_TETHERING,	/* 0 canaries (IPA v4.0 and above) */
+	IPA_MEM_STATS_V4_FILTER,	/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_V6_FILTER,	/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_V4_ROUTE,		/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_V6_ROUTE,		/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_FILTER_ROUTE,	/* 0 canaries (IPA v4.5 and above) */
 	IPA_MEM_STATS_DROP,		/* 0 canaries (IPA v4.0 and above) */
 	IPA_MEM_MODEM,			/* 0 canaries */
 	IPA_MEM_UC_EVENT_RING,		/* 1 canary */
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index e594bf3b600f..ccdb4a6a4c75 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -379,8 +379,8 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 
 	/* None of the stats fields are valid (IPA v4.0 and above) */
 
-	if (ipa->version != IPA_VERSION_3_5_1) {
-		mem = &ipa->mem[IPA_MEM_STATS_QUOTA];
+	if (ipa->version >= IPA_VERSION_4_0) {
+		mem = &ipa->mem[IPA_MEM_STATS_QUOTA_MODEM];
 		if (mem->size) {
 			req.hw_stats_quota_base_addr_valid = 1;
 			req.hw_stats_quota_base_addr =
diff --git a/drivers/net/ipa/ipa_qmi_msg.c b/drivers/net/ipa/ipa_qmi_msg.c
index 73413371e3d3..6838e8065072 100644
--- a/drivers/net/ipa/ipa_qmi_msg.c
+++ b/drivers/net/ipa/ipa_qmi_msg.c
@@ -56,7 +56,7 @@ struct qmi_elem_info ipa_indication_register_req_ei[] = {
 		.elem_size	=
 			sizeof_field(struct ipa_indication_register_req,
 				     ipa_mhi_ready_ind_valid),
-		.tlv_type	= 0x11,
+		.tlv_type	= 0x12,
 		.offset		= offsetof(struct ipa_indication_register_req,
 					   ipa_mhi_ready_ind_valid),
 	},
@@ -66,11 +66,51 @@ struct qmi_elem_info ipa_indication_register_req_ei[] = {
 		.elem_size	=
 			sizeof_field(struct ipa_indication_register_req,
 				     ipa_mhi_ready_ind),
-		.tlv_type	= 0x11,
+		.tlv_type	= 0x12,
 		.offset		= offsetof(struct ipa_indication_register_req,
 					   ipa_mhi_ready_ind),
 	},
 	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     endpoint_desc_ind_valid),
+		.tlv_type	= 0x13,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   endpoint_desc_ind_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     endpoint_desc_ind),
+		.tlv_type	= 0x13,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   endpoint_desc_ind),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     bw_change_ind_valid),
+		.tlv_type	= 0x14,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   bw_change_ind_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     bw_change_ind),
+		.tlv_type	= 0x14,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   bw_change_ind),
+	},
+	{
 		.data_type	= QMI_EOTI,
 	},
 };
@@ -530,7 +570,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
 					   hw_stats_quota_base_addr_valid),
 	},
 	{
-		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.data_type	= QMI_UNSIGNED_4_BYTE,
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
@@ -545,17 +585,17 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_quota_size_valid),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x20,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_quota_size_valid),
 	},
 	{
-		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.data_type	= QMI_UNSIGNED_4_BYTE,
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_quota_size),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x20,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_quota_size),
 	},
@@ -564,18 +604,38 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
+				     hw_stats_drop_base_addr_valid),
+		.tlv_type	= 0x21,
+		.offset		= offsetof(struct ipa_init_modem_driver_req,
+					   hw_stats_drop_base_addr_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_4_BYTE,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_init_modem_driver_req,
+				     hw_stats_drop_base_addr),
+		.tlv_type	= 0x21,
+		.offset		= offsetof(struct ipa_init_modem_driver_req,
+					   hw_stats_drop_base_addr),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_drop_size_valid),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x22,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_drop_size_valid),
 	},
 	{
-		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.data_type	= QMI_UNSIGNED_4_BYTE,
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_drop_size),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x22,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_drop_size),
 	},
diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h
index 12b6621f4b0e..3233d145fd87 100644
--- a/drivers/net/ipa/ipa_qmi_msg.h
+++ b/drivers/net/ipa/ipa_qmi_msg.h
@@ -24,7 +24,7 @@
  * information for each field.  The qmi_send_*() interfaces require
  * the message size to be provided.
  */
-#define IPA_QMI_INDICATION_REGISTER_REQ_SZ	12	/* -> server handle */
+#define IPA_QMI_INDICATION_REGISTER_REQ_SZ	20	/* -> server handle */
 #define IPA_QMI_INDICATION_REGISTER_RSP_SZ	7	/* <- server handle */
 #define IPA_QMI_INIT_DRIVER_REQ_SZ		162	/* client handle -> */
 #define IPA_QMI_INIT_DRIVER_RSP_SZ		25	/* client handle <- */
@@ -44,6 +44,10 @@ struct ipa_indication_register_req {
 	u8 data_usage_quota_reached;
 	u8 ipa_mhi_ready_ind_valid;
 	u8 ipa_mhi_ready_ind;
+	u8 endpoint_desc_ind_valid;
+	u8 endpoint_desc_ind;
+	u8 bw_change_ind_valid;
+	u8 bw_change_ind;
 };
 
 /* The response to a IPA_QMI_INDICATION_REGISTER request consists only of
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 732e691e9aa6..86fe2978e810 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2020 Linaro Ltd.
+ * Copyright (C) 2018-2021 Linaro Ltd.
  */
 #ifndef _IPA_REG_H_
 #define _IPA_REG_H_
@@ -217,8 +217,18 @@ static inline u32 ipa_reg_bcr_val(enum ipa_version version)
 	return 0x00000000;
 }
 
-/* The value of the next register must be a multiple of 8 */
-#define IPA_REG_LOCAL_PKT_PROC_CNTXT_BASE_OFFSET	0x000001e8
+/* The value of the next register must be a multiple of 8 (bottom 3 bits 0) */
+#define IPA_REG_LOCAL_PKT_PROC_CNTXT_OFFSET		0x000001e8
+
+/* Encoded value for LOCAL_PKT_PROC_CNTXT register BASE_ADDR field */
+static inline u32 proc_cntxt_base_addr_encoded(enum ipa_version version,
+					       u32 addr)
+{
+	if (version < IPA_VERSION_4_5)
+		return u32_encode_bits(addr, GENMASK(16, 0));
+
+	return u32_encode_bits(addr, GENMASK(17, 0));
+}
 
 /* ipa->available defines the valid bits in the AGGR_FORCE_CLOSE register */
 #define IPA_REG_AGGR_FORCE_CLOSE_OFFSET			0x000001ec
@@ -227,18 +237,6 @@ static inline u32 ipa_reg_bcr_val(enum ipa_version version)
 #define IPA_REG_COUNTER_CFG_OFFSET			0x000001f0
 #define AGGR_GRANULARITY_FMASK			GENMASK(8, 4)
 
-/* The internal inactivity timer clock is used for the aggregation timer */
-#define TIMER_FREQUENCY	32000		/* 32 KHz inactivity timer clock */
-
-/* Compute the value to use in the AGGR_GRANULARITY field representing the
- * given number of microseconds.  The value is one less than the number of
- * timer ticks in the requested period.  0 not a valid granularity value.
- */
-static inline u32 ipa_aggr_granularity_val(u32 usec)
-{
-	return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1;
-}
-
 /* The next register is not present for IPA v4.5 */
 #define IPA_REG_TX_CFG_OFFSET				0x000001fc
 /* The first three fields are present for IPA v3.5.1 only */
@@ -388,6 +386,18 @@ enum ipa_cs_offload_en {
 	IPA_CS_OFFLOAD_DL		= 0x2,
 };
 
+/* Valid only for TX (IPA consumer) endpoints */
+#define IPA_REG_ENDP_INIT_NAT_N_OFFSET(ep) \
+					(0x0000080c + 0x0070 * (ep))
+#define NAT_EN_FMASK				GENMASK(1, 0)
+
+/** enum ipa_nat_en - ENDP_INIT_NAT register NAT_EN field value */
+enum ipa_nat_en {
+	IPA_NAT_BYPASS			= 0x0,
+	IPA_NAT_SRC			= 0x1,
+	IPA_NAT_DST			= 0x2,
+};
+
 #define IPA_REG_ENDP_INIT_HDR_N_OFFSET(ep) \
 					(0x00000810 + 0x0070 * (ep))
 #define HDR_LEN_FMASK				GENMASK(5, 0)
@@ -580,28 +590,40 @@ static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp)
 /* Valid only for TX (IPA consumer) endpoints */
 #define IPA_REG_ENDP_INIT_SEQ_N_OFFSET(txep) \
 					(0x0000083c + 0x0070 * (txep))
-#define HPS_SEQ_TYPE_FMASK			GENMASK(3, 0)
-#define DPS_SEQ_TYPE_FMASK			GENMASK(7, 4)
-#define HPS_REP_SEQ_TYPE_FMASK			GENMASK(11, 8)
-#define DPS_REP_SEQ_TYPE_FMASK			GENMASK(15, 12)
+#define SEQ_TYPE_FMASK				GENMASK(7, 0)
+#define SEQ_REP_TYPE_FMASK			GENMASK(15, 8)
 
 /**
- * enum ipa_seq_type - HPS and DPS sequencer type fields in ENDP_INIT_SEQ_N
- * @IPA_SEQ_DMA_ONLY:		only DMA is performed
- * @IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP:
- *	second packet processing pass + no decipher + microcontroller
- * @IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP:
- *	packet processing + no decipher + no uCP + HPS REP DMA parser
- * @IPA_SEQ_INVALID:		invalid sequencer type
+ * enum ipa_seq_type - HPS and DPS sequencer type
+ *
+ * The low-order byte of the sequencer type register defines the number of
+ * passes a packet takes through the IPA pipeline.  The last pass through can
+ * optionally skip the microprocessor.  Deciphering is optional for all types;
+ * if enabled, an additional mask (two bits) is added to the type value.
  *
- * The values defined here are broken into 4-bit nibbles that are written
- * into fields of the ENDP_INIT_SEQ registers.
+ * Note: not all combinations of ipa_seq_type and ipa_seq_rep_type are
+ * supported (or meaningful).
  */
+#define IPA_SEQ_DECIPHER			0x11
 enum ipa_seq_type {
-	IPA_SEQ_DMA_ONLY			= 0x0000,
-	IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP	= 0x0004,
-	IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP	= 0x0806,
-	IPA_SEQ_INVALID				= 0xffff,
+	IPA_SEQ_DMA				= 0x00,
+	IPA_SEQ_1_PASS				= 0x02,
+	IPA_SEQ_2_PASS_SKIP_LAST_UC		= 0x04,
+	IPA_SEQ_1_PASS_SKIP_LAST_UC		= 0x06,
+	IPA_SEQ_2_PASS				= 0x0a,
+	IPA_SEQ_3_PASS_SKIP_LAST_UC		= 0x0c,
+};
+
+/**
+ * enum ipa_seq_rep_type - replicated packet sequencer type
+ *
+ * This goes in the second byte of the endpoint sequencer type register.
+ *
+ * Note: not all combinations of ipa_seq_type and ipa_seq_rep_type are
+ * supported (or meaningful).
+ */
+enum ipa_seq_rep_type {
+	IPA_SEQ_REP_DMA_PARSER			= 0x08,
 };
 
 #define IPA_REG_ENDP_STATUS_N_OFFSET(ep) \
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index baaab3dd0e63..4236a50ff03a 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -118,21 +118,15 @@
 /* Check things that can be validated at build time. */
 static void ipa_table_validate_build(void)
 {
-	/* IPA hardware accesses memory 128 bytes at a time.  Addresses
-	 * referred to by entries in filter and route tables must be
-	 * aligned on 128-byte byte boundaries.  The only rule address
-	 * ever use is the "zero rule", and it's aligned at the base
-	 * of a coherent DMA allocation.
+	/* Filter and route tables contain DMA addresses that refer
+	 * to filter or route rules.  But the size of a table entry
+	 * is 64 bits regardless of what the size of an AP DMA address
+	 * is.  A fixed constant defines the size of an entry, and
+	 * code in ipa_table_init() uses a pointer to __le64 to
+	 * initialize tables.
 	 */
-	BUILD_BUG_ON(ARCH_DMA_MINALIGN % IPA_TABLE_ALIGN);
-
-	/* Filter and route tables contain DMA addresses that refer to
-	 * filter or route rules.  We use a fixed constant to represent
-	 * the size of either type of table entry.  Code in ipa_table_init()
-	 * uses a pointer to __le64 to initialize table entriews.
-	 */
-	BUILD_BUG_ON(IPA_TABLE_ENTRY_SIZE != sizeof(dma_addr_t));
-	BUILD_BUG_ON(sizeof(dma_addr_t) != sizeof(__le64));
+	BUILD_BUG_ON(sizeof(dma_addr_t) > IPA_TABLE_ENTRY_SIZE);
+	BUILD_BUG_ON(sizeof(__le64) != IPA_TABLE_ENTRY_SIZE);
 
 	/* A "zero rule" is used to represent no filtering or no routing.
 	 * It is a 64-bit block of zeroed memory.  Code in ipa_table_init()
@@ -239,11 +233,6 @@ static void ipa_table_validate_build(void)
 
 #endif /* !IPA_VALIDATE */
 
-bool ipa_table_hash_support(struct ipa *ipa)
-{
-	return ipa->version != IPA_VERSION_4_2;
-}
-
 /* Zero entry count means no table, so just return a 0 address */
 static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count)
 {
@@ -668,6 +657,21 @@ int ipa_table_init(struct ipa *ipa)
 	if (!virt)
 		return -ENOMEM;
 
+	/* We put the "zero rule" at the base of our table area.  The IPA
+	 * hardware requires route and filter table rules to be aligned
+	 * on a 128-byte boundary.  As long as the alignment constraint
+	 * is a power of 2, we can check alignment using just the bottom
+	 * 32 bits for a DMA address of any size.
+	 */
+	BUILD_BUG_ON(!is_power_of_2(IPA_TABLE_ALIGN));
+	if (lower_32_bits(addr) % IPA_TABLE_ALIGN) {
+		dev_err(dev, "table address %pad not %u-byte aligned\n",
+			&addr, IPA_TABLE_ALIGN);
+		dma_free_coherent(dev, size, virt, addr);
+
+		return -ERANGE;
+	}
+
 	ipa->table_virt = virt;
 	ipa->table_addr = addr;
 
diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
index 1a68d20f19d6..889c2e93b122 100644
--- a/drivers/net/ipa/ipa_table.h
+++ b/drivers/net/ipa/ipa_table.h
@@ -55,7 +55,10 @@ static inline bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask)
  * ipa_table_hash_support() - Return true if hashed tables are supported
  * @ipa:	IPA pointer
  */
-bool ipa_table_hash_support(struct ipa *ipa);
+static inline bool ipa_table_hash_support(struct ipa *ipa)
+{
+	return ipa->version != IPA_VERSION_4_2;
+}
 
 /**
  * ipa_table_reset() - Reset filter and route tables entries to "none"
diff --git a/drivers/net/ipa/ipa_version.h b/drivers/net/ipa/ipa_version.h
index 2944e2a89023..ee2b3d02f3cd 100644
--- a/drivers/net/ipa/ipa_version.h
+++ b/drivers/net/ipa/ipa_version.h
@@ -8,17 +8,32 @@
 
 /**
  * enum ipa_version
+ * @IPA_VERSION_3_0:	IPA version 3.0/GSI version 1.0
+ * @IPA_VERSION_3_1:	IPA version 3.1/GSI version 1.1
+ * @IPA_VERSION_3_5:	IPA version 3.5/GSI version 1.2
+ * @IPA_VERSION_3_5_1:	IPA version 3.5.1/GSI version 1.3
+ * @IPA_VERSION_4_0:	IPA version 4.0/GSI version 2.0
+ * @IPA_VERSION_4_1:	IPA version 4.1/GSI version 2.0
+ * @IPA_VERSION_4_2:	IPA version 4.2/GSI version 2.2
+ * @IPA_VERSION_4_5:	IPA version 4.5/GSI version 2.5
+ * @IPA_VERSION_4_7:	IPA version 4.7/GSI version 2.7
+ * @IPA_VERSION_4_9:	IPA version 4.9/GSI version 2.9
+ * @IPA_VERSION_4_11:	IPA version 4.11/GSI version 2.11 (2.1.1)
  *
  * Defines the version of IPA (and GSI) hardware present on the platform.
- * It seems this might be better defined elsewhere, but having it here gets
- * it where it's needed.
  */
 enum ipa_version {
-	IPA_VERSION_3_5_1,	/* GSI version 1.3.0 */
-	IPA_VERSION_4_0,	/* GSI version 2.0 */
-	IPA_VERSION_4_1,	/* GSI version 2.1 */
-	IPA_VERSION_4_2,	/* GSI version 2.2 */
-	IPA_VERSION_4_5,	/* GSI version 2.5 */
+	IPA_VERSION_3_0,
+	IPA_VERSION_3_1,
+	IPA_VERSION_3_5,
+	IPA_VERSION_3_5_1,
+	IPA_VERSION_4_0,
+	IPA_VERSION_4_1,
+	IPA_VERSION_4_2,
+	IPA_VERSION_4_5,
+	IPA_VERSION_4_7,
+	IPA_VERSION_4_9,
+	IPA_VERSION_4_11,
 };
 
 #endif /* _IPA_VERSION_H_ */
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index a10cc460d7cf..d06e06f5e31a 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -200,6 +200,17 @@ config MDIO_BUS_MUX_MESON_G12A
 	  the amlogic g12a SoC. The multiplexers connects either the external
 	  or the internal MDIO bus to the parent bus.
 
+config MDIO_BUS_MUX_BCM6368
+	tristate "Broadcom BCM6368 MDIO bus multiplexers"
+	depends on OF && OF_MDIO && (BMIPS_GENERIC || COMPILE_TEST)
+	select MDIO_BUS_MUX
+	default BMIPS_GENERIC
+	help
+	  This module provides a driver for MDIO bus multiplexers found in
+	  BCM6368 based Broadcom SoCs. This multiplexer connects one of several
+	  child MDIO bus to a parent bus. Buses could be internal as well as
+	  external and selection logic lies inside the same multiplexer.
+
 config MDIO_BUS_MUX_BCM_IPROC
 	tristate "Broadcom iProc based MDIO bus multiplexers"
 	depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
index 5c498dde463f..c3ec0ef989df 100644
--- a/drivers/net/mdio/Makefile
+++ b/drivers/net/mdio/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_MDIO_THUNDER)		+= mdio-thunder.o
 obj-$(CONFIG_MDIO_XGENE)		+= mdio-xgene.o
 
 obj-$(CONFIG_MDIO_BUS_MUX)		+= mdio-mux.o
+obj-$(CONFIG_MDIO_BUS_MUX_BCM6368)	+= mdio-mux-bcm6368.o
 obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC)	+= mdio-mux-bcm-iproc.o
 obj-$(CONFIG_MDIO_BUS_MUX_GPIO)		+= mdio-mux-gpio.o
 obj-$(CONFIG_MDIO_BUS_MUX_MESON_G12A)	+= mdio-mux-meson-g12a.o
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index fbd36891ee64..5d171e7f118d 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -5,20 +5,18 @@
  * Copyright (C) 2014-2017 Broadcom
  */
 
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
-#include <linux/sched.h>
 #include <linux/module.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/of_mdio.h>
-
+#include <linux/of_platform.h>
+#include <linux/phy.h>
 #include <linux/platform_data/mdio-bcm-unimac.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
 
 #define MDIO_CMD		0x00
 #define  MDIO_START_BUSY	(1 << 29)
diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
index d3915f831854..0f457c436335 100644
--- a/drivers/net/mdio/mdio-bitbang.c
+++ b/drivers/net/mdio/mdio-bitbang.c
@@ -14,10 +14,10 @@
  * Vitaly Bordug <vbordug@ru.mvista.com>
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/mdio-bitbang.h>
+#include <linux/module.h>
 #include <linux/types.h>
-#include <linux/delay.h>
 
 #define MDIO_READ 2
 #define MDIO_WRITE 1
diff --git a/drivers/net/mdio/mdio-cavium.c b/drivers/net/mdio/mdio-cavium.c
index 1afd6fc1a351..95ce274c1be1 100644
--- a/drivers/net/mdio/mdio-cavium.c
+++ b/drivers/net/mdio/mdio-cavium.c
@@ -4,9 +4,9 @@
  */
 
 #include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/phy.h>
-#include <linux/io.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index 1b00235d7dc5..56c8f914f893 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -17,15 +17,15 @@
  * Vitaly Bordug <vbordug@ru.mvista.com>
  */
 
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/platform_data/mdio-gpio.h>
 #include <linux/mdio-bitbang.h>
 #include <linux/mdio-gpio.h>
-#include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/of_mdio.h>
+#include <linux/platform_data/mdio-gpio.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 struct mdio_gpio_info {
 	struct mdiobb_ctrl ctrl;
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 25c25ea6da66..9cd71d896963 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -3,10 +3,10 @@
 /* Copyright (c) 2020 Sartura Ltd. */
 
 #include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index 1bd18857e1c5..8fe8f0119fc1 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -7,12 +7,12 @@
 
 #include <linux/delay.h>
 #include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/regmap.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 /* MII address register definitions */
 #define MII_ADDR_REG_ADDR                       0x10
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 11f583fd4611..b36e5ea04ddf 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -6,14 +6,14 @@
  * Copyright (c) 2017 Microsemi Corporation
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
 
 #define MSCC_MIIM_REG_STATUS		0x0
 #define		MSCC_MIIM_STATUS_STAT_PENDING	BIT(2)
diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 42fb5f166136..641cfa41f492 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -3,14 +3,14 @@
  * Copyright 2016 Broadcom
  */
 #include <linux/clk.h>
-#include <linux/platform_device.h>
+#include <linux/delay.h>
 #include <linux/device.h>
-#include <linux/of_mdio.h>
+#include <linux/iopoll.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
-#include <linux/delay.h>
-#include <linux/iopoll.h>
+#include <linux/platform_device.h>
 
 #define MDIO_RATE_ADJ_EXT_OFFSET	0x000
 #define MDIO_RATE_ADJ_INT_OFFSET	0x004
diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c
new file mode 100644
index 000000000000..6dcbf987d61b
--- /dev/null
+++ b/drivers/net/mdio/mdio-mux-bcm6368.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Broadcom BCM6368 mdiomux bus controller driver
+ *
+ * Copyright (C) 2021 Álvaro Fernández Rojas <noltari@gmail.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mdio-mux.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+
+#define MDIOC_REG		0x0
+#define MDIOC_EXT_MASK		BIT(16)
+#define MDIOC_REG_SHIFT		20
+#define MDIOC_PHYID_SHIFT	25
+#define MDIOC_RD_MASK		BIT(30)
+#define MDIOC_WR_MASK		BIT(31)
+
+#define MDIOD_REG		0x4
+
+struct bcm6368_mdiomux_desc {
+	void *mux_handle;
+	void __iomem *base;
+	struct device *dev;
+	struct mii_bus *mii_bus;
+	int ext_phy;
+};
+
+static int bcm6368_mdiomux_read(struct mii_bus *bus, int phy_id, int loc)
+{
+	struct bcm6368_mdiomux_desc *md = bus->priv;
+	uint32_t reg;
+	int ret;
+
+	__raw_writel(0, md->base + MDIOC_REG);
+
+	reg = MDIOC_RD_MASK |
+	      (phy_id << MDIOC_PHYID_SHIFT) |
+	      (loc << MDIOC_REG_SHIFT);
+	if (md->ext_phy)
+		reg |= MDIOC_EXT_MASK;
+
+	__raw_writel(reg, md->base + MDIOC_REG);
+	udelay(50);
+	ret = __raw_readw(md->base + MDIOD_REG);
+
+	return ret;
+}
+
+static int bcm6368_mdiomux_write(struct mii_bus *bus, int phy_id, int loc,
+				 uint16_t val)
+{
+	struct bcm6368_mdiomux_desc *md = bus->priv;
+	uint32_t reg;
+
+	__raw_writel(0, md->base + MDIOC_REG);
+
+	reg = MDIOC_WR_MASK |
+	      (phy_id << MDIOC_PHYID_SHIFT) |
+	      (loc << MDIOC_REG_SHIFT);
+	if (md->ext_phy)
+		reg |= MDIOC_EXT_MASK;
+	reg |= val;
+
+	__raw_writel(reg, md->base + MDIOC_REG);
+	udelay(50);
+
+	return 0;
+}
+
+static int bcm6368_mdiomux_switch_fn(int current_child, int desired_child,
+				     void *data)
+{
+	struct bcm6368_mdiomux_desc *md = data;
+
+	md->ext_phy = desired_child;
+
+	return 0;
+}
+
+static int bcm6368_mdiomux_probe(struct platform_device *pdev)
+{
+	struct bcm6368_mdiomux_desc *md;
+	struct mii_bus *bus;
+	struct resource *res;
+	int rc;
+
+	md = devm_kzalloc(&pdev->dev, sizeof(*md), GFP_KERNEL);
+	if (!md)
+		return -ENOMEM;
+	md->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	/*
+	 * Just ioremap, as this MDIO block is usually integrated into an
+	 * Ethernet MAC controller register range
+	 */
+	md->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+	if (!md->base) {
+		dev_err(&pdev->dev, "failed to ioremap register\n");
+		return -ENOMEM;
+	}
+
+	md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
+	if (!md->mii_bus) {
+		dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
+		return ENOMEM;
+	}
+
+	bus = md->mii_bus;
+	bus->priv = md;
+	bus->name = "BCM6368 MDIO mux bus";
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id);
+	bus->parent = &pdev->dev;
+	bus->read = bcm6368_mdiomux_read;
+	bus->write = bcm6368_mdiomux_write;
+	bus->phy_mask = 0x3f;
+	bus->dev.of_node = pdev->dev.of_node;
+
+	rc = mdiobus_register(bus);
+	if (rc) {
+		dev_err(&pdev->dev, "mdiomux registration failed\n");
+		return rc;
+	}
+
+	platform_set_drvdata(pdev, md);
+
+	rc = mdio_mux_init(md->dev, md->dev->of_node,
+			   bcm6368_mdiomux_switch_fn, &md->mux_handle, md,
+			   md->mii_bus);
+	if (rc) {
+		dev_info(md->dev, "mdiomux initialization failed\n");
+		goto out_register;
+	}
+
+	dev_info(&pdev->dev, "Broadcom BCM6368 MDIO mux bus\n");
+
+	return 0;
+
+out_register:
+	mdiobus_unregister(bus);
+	return rc;
+}
+
+static int bcm6368_mdiomux_remove(struct platform_device *pdev)
+{
+	struct bcm6368_mdiomux_desc *md = platform_get_drvdata(pdev);
+
+	mdio_mux_uninit(md->mux_handle);
+	mdiobus_unregister(md->mii_bus);
+
+	return 0;
+}
+
+static const struct of_device_id bcm6368_mdiomux_ids[] = {
+	{ .compatible = "brcm,bcm6368-mdio-mux", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, bcm6368_mdiomux_ids);
+
+static struct platform_driver bcm6368_mdiomux_driver = {
+	.driver = {
+		.name = "bcm6368-mdio-mux",
+		.of_match_table = bcm6368_mdiomux_ids,
+	},
+	.probe	= bcm6368_mdiomux_probe,
+	.remove	= bcm6368_mdiomux_remove,
+};
+module_platform_driver(bcm6368_mdiomux_driver);
+
+MODULE_AUTHOR("Álvaro Fernández Rojas <noltari@gmail.com>");
+MODULE_DESCRIPTION("BCM6368 mdiomux bus controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/mdio/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c
index 10a758fdc9e6..3c7f16f06b45 100644
--- a/drivers/net/mdio/mdio-mux-gpio.c
+++ b/drivers/net/mdio/mdio-mux-gpio.c
@@ -3,13 +3,13 @@
  * Copyright (C) 2011, 2012 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
 #include <linux/device.h>
-#include <linux/of_mdio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
-#include <linux/gpio/consumer.h>
+#include <linux/platform_device.h>
 
 #define DRV_VERSION "1.1"
 #define DRV_DESCRIPTION "GPIO controlled MDIO bus multiplexer driver"
diff --git a/drivers/net/mdio/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c
index d1a8780e24d8..c02fb2a067ee 100644
--- a/drivers/net/mdio/mdio-mux-mmioreg.c
+++ b/drivers/net/mdio/mdio-mux-mmioreg.c
@@ -7,13 +7,13 @@
  * Copyright 2012 Freescale Semiconductor, Inc.
  */
 
-#include <linux/platform_device.h>
 #include <linux/device.h>
+#include <linux/mdio-mux.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/module.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
+#include <linux/platform_device.h>
 
 struct mdio_mux_mmioreg_state {
 	void *mux_handle;
diff --git a/drivers/net/mdio/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c
index d6564381aa3e..527acfc3c045 100644
--- a/drivers/net/mdio/mdio-mux-multiplexer.c
+++ b/drivers/net/mdio/mdio-mux-multiplexer.c
@@ -4,10 +4,10 @@
  * Copyright 2019 NXP
  */
 
-#include <linux/platform_device.h>
 #include <linux/mdio-mux.h>
 #include <linux/module.h>
 #include <linux/mux/consumer.h>
+#include <linux/platform_device.h>
 
 struct mdio_mux_multiplexer_state {
 	struct mux_control *muxc;
diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c
index 6a1d3540210b..110e4ee85785 100644
--- a/drivers/net/mdio/mdio-mux.c
+++ b/drivers/net/mdio/mdio-mux.c
@@ -3,12 +3,12 @@
  * Copyright (C) 2011, 2012 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
-#include <linux/mdio-mux.h>
-#include <linux/of_mdio.h>
 #include <linux/device.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
+#include <linux/platform_device.h>
 
 #define DRV_DESCRIPTION "MDIO bus multiplexer driver"
 
diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index d1e1009d51af..8ce99c4888e1 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -3,13 +3,13 @@
  * Copyright (C) 2009-2015 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/module.h>
-#include <linux/gfp.h>
 #include <linux/phy.h>
-#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c
index 3d7eda99d34e..cb1761693b69 100644
--- a/drivers/net/mdio/mdio-thunder.c
+++ b/drivers/net/mdio/mdio-thunder.c
@@ -3,14 +3,14 @@
  * Copyright (C) 2009-2016 Cavium, Inc.
  */
 
-#include <linux/of_address.h>
-#include <linux/of_mdio.h>
-#include <linux/module.h>
+#include <linux/acpi.h>
 #include <linux/gfp.h>
-#include <linux/phy.h>
 #include <linux/io.h>
-#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
 #include <linux/pci.h>
+#include <linux/phy.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
index 461207cdf5d6..7ab4e26db08c 100644
--- a/drivers/net/mdio/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -13,11 +13,11 @@
 #include <linux/io.h>
 #include <linux/mdio/mdio-xgene.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
 #include <linux/of_mdio.h>
-#include <linux/prefetch.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
 #include <linux/phy.h>
+#include <linux/prefetch.h>
 #include <net/ip.h>
 
 static bool xgene_mdio_status;
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index ea9d5855fb52..094494a68ddf 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -8,17 +8,17 @@
  * out of the OpenFirmware device tree and using it to populate an mii_bus.
  */
 
-#include <linux/kernel.h>
 #include <linux/device.h>
-#include <linux/netdevice.h>
 #include <linux/err.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
 
 #define DEFAULT_GPIO_RESET_DELAY	10	/* in microseconds */
 
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index ade086eed955..a1cbfa44a1e1 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -13,3 +13,7 @@ endif
 ifneq ($(CONFIG_XFRM_OFFLOAD),)
 netdevsim-objs += ipsec.o
 endif
+
+ifneq ($(CONFIG_PSAMPLE),)
+netdevsim-objs += psample.o
+endif
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index dbeb29fa16e8..6189a4c0d39e 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -1032,10 +1032,14 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
 	if (err)
 		goto err_fib_destroy;
 
-	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	err = nsim_dev_psample_init(nsim_dev);
 	if (err)
 		goto err_health_exit;
 
+	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	if (err)
+		goto err_psample_exit;
+
 	nsim_dev->take_snapshot = debugfs_create_file("take_snapshot",
 						      0200,
 						      nsim_dev->ddir,
@@ -1043,6 +1047,8 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
 						&nsim_dev_take_snapshot_fops);
 	return 0;
 
+err_psample_exit:
+	nsim_dev_psample_exit(nsim_dev);
 err_health_exit:
 	nsim_dev_health_exit(nsim_dev);
 err_fib_destroy:
@@ -1118,14 +1124,20 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
 	if (err)
 		goto err_health_exit;
 
-	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	err = nsim_dev_psample_init(nsim_dev);
 	if (err)
 		goto err_bpf_dev_exit;
 
+	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	if (err)
+		goto err_psample_exit;
+
 	devlink_params_publish(devlink);
 	devlink_reload_enable(devlink);
 	return 0;
 
+err_psample_exit:
+	nsim_dev_psample_exit(nsim_dev);
 err_bpf_dev_exit:
 	nsim_bpf_dev_exit(nsim_dev);
 err_health_exit:
@@ -1158,6 +1170,7 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev)
 		return;
 	debugfs_remove(nsim_dev->take_snapshot);
 	nsim_dev_port_del_all(nsim_dev);
+	nsim_dev_psample_exit(nsim_dev);
 	nsim_dev_health_exit(nsim_dev);
 	nsim_fib_destroy(devlink, nsim_dev->fib_data);
 	nsim_dev_traps_exit(devlink);
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 46fb414f7ca6..fda6f37e7055 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -14,6 +14,7 @@
  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
  */
 
+#include <linux/bitmap.h>
 #include <linux/in6.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -47,15 +48,18 @@ struct nsim_fib_data {
 	struct nsim_fib_entry nexthops;
 	struct rhashtable fib_rt_ht;
 	struct list_head fib_rt_list;
-	struct mutex fib_lock; /* Protects hashtable and list */
+	struct mutex fib_lock; /* Protects FIB HT and list */
 	struct notifier_block nexthop_nb;
 	struct rhashtable nexthop_ht;
 	struct devlink *devlink;
 	struct work_struct fib_event_work;
 	struct list_head fib_event_queue;
 	spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
+	struct mutex nh_lock; /* Protects NH HT */
 	struct dentry *ddir;
 	bool fail_route_offload;
+	bool fail_res_nexthop_group_replace;
+	bool fail_nexthop_bucket_replace;
 };
 
 struct nsim_fib_rt_key {
@@ -116,6 +120,7 @@ struct nsim_nexthop {
 	struct rhash_head ht_node;
 	u64 occ;
 	u32 id;
+	bool is_resilient;
 };
 
 static const struct rhashtable_params nsim_nexthop_ht_params = {
@@ -869,10 +874,8 @@ err_rt_offload_failed_flag_set:
 	return err;
 }
 
-static int nsim_fib_event(struct nsim_fib_event *fib_event)
+static void nsim_fib_event(struct nsim_fib_event *fib_event)
 {
-	int err = 0;
-
 	switch (fib_event->family) {
 	case AF_INET:
 		nsim_fib4_event(fib_event->data, &fib_event->fen_info,
@@ -885,8 +888,6 @@ static int nsim_fib_event(struct nsim_fib_event *fib_event)
 		nsim_fib6_event_fini(&fib_event->fib6_event);
 		break;
 	}
-
-	return err;
 }
 
 static int nsim_fib4_prepare_event(struct fib_notifier_info *info,
@@ -1118,6 +1119,10 @@ static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data,
 		for (i = 0; i < info->nh_grp->num_nh; i++)
 			occ += info->nh_grp->nh_entries[i].weight;
 		break;
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+		occ = info->nh_res_table->num_nh_buckets;
+		nexthop->is_resilient = true;
+		break;
 	default:
 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
 		kfree(nexthop);
@@ -1160,6 +1165,21 @@ err_num_decrease:
 
 }
 
+static void nsim_nexthop_hw_flags_set(struct net *net,
+				      const struct nsim_nexthop *nexthop,
+				      bool trap)
+{
+	int i;
+
+	nexthop_set_hw_flags(net, nexthop->id, false, trap);
+
+	if (!nexthop->is_resilient)
+		return;
+
+	for (i = 0; i < nexthop->occ; i++)
+		nexthop_bucket_set_hw_flags(net, nexthop->id, i, false, trap);
+}
+
 static int nsim_nexthop_add(struct nsim_fib_data *data,
 			    struct nsim_nexthop *nexthop,
 			    struct netlink_ext_ack *extack)
@@ -1178,7 +1198,7 @@ static int nsim_nexthop_add(struct nsim_fib_data *data,
 		goto err_nexthop_dismiss;
 	}
 
-	nexthop_set_hw_flags(net, nexthop->id, false, true);
+	nsim_nexthop_hw_flags_set(net, nexthop, true);
 
 	return 0;
 
@@ -1207,7 +1227,7 @@ static int nsim_nexthop_replace(struct nsim_fib_data *data,
 		goto err_nexthop_dismiss;
 	}
 
-	nexthop_set_hw_flags(net, nexthop->id, false, true);
+	nsim_nexthop_hw_flags_set(net, nexthop, true);
 	nsim_nexthop_account(data, nexthop_old->occ, false, extack);
 	nsim_nexthop_destroy(nexthop_old);
 
@@ -1258,6 +1278,32 @@ static void nsim_nexthop_remove(struct nsim_fib_data *data,
 	nsim_nexthop_destroy(nexthop);
 }
 
+static int nsim_nexthop_res_table_pre_replace(struct nsim_fib_data *data,
+					      struct nh_notifier_info *info)
+{
+	if (data->fail_res_nexthop_group_replace) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace a resilient nexthop group");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nsim_nexthop_bucket_replace(struct nsim_fib_data *data,
+				       struct nh_notifier_info *info)
+{
+	if (data->fail_nexthop_bucket_replace) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace nexthop bucket");
+		return -EINVAL;
+	}
+
+	nexthop_bucket_set_hw_flags(info->net, info->id,
+				    info->nh_res_bucket->bucket_index,
+				    false, true);
+
+	return 0;
+}
+
 static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
 				 void *ptr)
 {
@@ -1266,8 +1312,7 @@ static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
 	struct nh_notifier_info *info = ptr;
 	int err = 0;
 
-	ASSERT_RTNL();
-
+	mutex_lock(&data->nh_lock);
 	switch (event) {
 	case NEXTHOP_EVENT_REPLACE:
 		err = nsim_nexthop_insert(data, info);
@@ -1275,10 +1320,17 @@ static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
 	case NEXTHOP_EVENT_DEL:
 		nsim_nexthop_remove(data, info);
 		break;
+	case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE:
+		err = nsim_nexthop_res_table_pre_replace(data, info);
+		break;
+	case NEXTHOP_EVENT_BUCKET_REPLACE:
+		err = nsim_nexthop_bucket_replace(data, info);
+		break;
 	default:
 		break;
 	}
 
+	mutex_unlock(&data->nh_lock);
 	return notifier_from_errno(err);
 }
 
@@ -1289,11 +1341,68 @@ static void nsim_nexthop_free(void *ptr, void *arg)
 	struct net *net;
 
 	net = devlink_net(data->devlink);
-	nexthop_set_hw_flags(net, nexthop->id, false, false);
+	nsim_nexthop_hw_flags_set(net, nexthop, false);
 	nsim_nexthop_account(data, nexthop->occ, false, NULL);
 	nsim_nexthop_destroy(nexthop);
 }
 
+static ssize_t nsim_nexthop_bucket_activity_write(struct file *file,
+						  const char __user *user_buf,
+						  size_t size, loff_t *ppos)
+{
+	struct nsim_fib_data *data = file->private_data;
+	struct net *net = devlink_net(data->devlink);
+	struct nsim_nexthop *nexthop;
+	unsigned long *activity;
+	loff_t pos = *ppos;
+	u16 bucket_index;
+	char buf[128];
+	int err = 0;
+	u32 nhid;
+
+	if (pos != 0)
+		return -EINVAL;
+	if (size > sizeof(buf))
+		return -EINVAL;
+	if (copy_from_user(buf, user_buf, size))
+		return -EFAULT;
+	if (sscanf(buf, "%u %hu", &nhid, &bucket_index) != 2)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &nhid,
+					 nsim_nexthop_ht_params);
+	if (!nexthop || !nexthop->is_resilient ||
+	    bucket_index >= nexthop->occ) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	activity = bitmap_zalloc(nexthop->occ, GFP_KERNEL);
+	if (!activity) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	bitmap_set(activity, bucket_index, 1);
+	nexthop_res_grp_activity_update(net, nhid, nexthop->occ, activity);
+	bitmap_free(activity);
+
+out:
+	rtnl_unlock();
+
+	*ppos = size;
+	return err ?: size;
+}
+
+static const struct file_operations nsim_nexthop_bucket_activity_fops = {
+	.open = simple_open,
+	.write = nsim_nexthop_bucket_activity_write,
+	.llseek = no_llseek,
+	.owner = THIS_MODULE,
+};
+
 static u64 nsim_fib_ipv4_resource_occ_get(void *priv)
 {
 	struct nsim_fib_data *data = priv;
@@ -1383,6 +1492,17 @@ nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev)
 	data->fail_route_offload = false;
 	debugfs_create_bool("fail_route_offload", 0600, data->ddir,
 			    &data->fail_route_offload);
+
+	data->fail_res_nexthop_group_replace = false;
+	debugfs_create_bool("fail_res_nexthop_group_replace", 0600, data->ddir,
+			    &data->fail_res_nexthop_group_replace);
+
+	data->fail_nexthop_bucket_replace = false;
+	debugfs_create_bool("fail_nexthop_bucket_replace", 0600, data->ddir,
+			    &data->fail_nexthop_bucket_replace);
+
+	debugfs_create_file("nexthop_bucket_activity", 0200, data->ddir,
+			    data, &nsim_nexthop_bucket_activity_fops);
 	return 0;
 }
 
@@ -1408,6 +1528,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
 	if (err)
 		goto err_data_free;
 
+	mutex_init(&data->nh_lock);
 	err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
 	if (err)
 		goto err_debugfs_exit;
@@ -1473,6 +1594,7 @@ err_rhashtable_nexthop_destroy:
 				    data);
 	mutex_destroy(&data->fib_lock);
 err_debugfs_exit:
+	mutex_destroy(&data->nh_lock);
 	nsim_fib_debugfs_exit(data);
 err_data_free:
 	kfree(data);
@@ -1501,6 +1623,7 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
 	WARN_ON_ONCE(!list_empty(&data->fib_event_queue));
 	WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
 	mutex_destroy(&data->fib_lock);
+	mutex_destroy(&data->nh_lock);
 	nsim_fib_debugfs_exit(data);
 	kfree(data);
 }
diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c
index 21e2974660e7..04aebdf85747 100644
--- a/drivers/net/netdevsim/health.c
+++ b/drivers/net/netdevsim/health.c
@@ -235,15 +235,10 @@ static ssize_t nsim_dev_health_break_write(struct file *file,
 	char *break_msg;
 	int err;
 
-	break_msg = kmalloc(count + 1, GFP_KERNEL);
-	if (!break_msg)
-		return -ENOMEM;
+	break_msg = memdup_user_nul(data, count);
+	if (IS_ERR(break_msg))
+		return PTR_ERR(break_msg);
 
-	if (copy_from_user(break_msg, data, count)) {
-		err = -EFAULT;
-		goto out;
-	}
-	break_msg[count] = '\0';
 	if (break_msg[count - 1] == '\n')
 		break_msg[count - 1] = '\0';
 
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 48163c5f2ec9..d735c21def4b 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -180,6 +180,20 @@ struct nsim_dev_health {
 int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink);
 void nsim_dev_health_exit(struct nsim_dev *nsim_dev);
 
+#if IS_ENABLED(CONFIG_PSAMPLE)
+int nsim_dev_psample_init(struct nsim_dev *nsim_dev);
+void nsim_dev_psample_exit(struct nsim_dev *nsim_dev);
+#else
+static inline int nsim_dev_psample_init(struct nsim_dev *nsim_dev)
+{
+	return 0;
+}
+
+static inline void nsim_dev_psample_exit(struct nsim_dev *nsim_dev)
+{
+}
+#endif
+
 struct nsim_dev_port {
 	struct list_head list;
 	struct devlink_port devlink_port;
@@ -229,6 +243,7 @@ struct nsim_dev {
 		bool static_iana_vxlan;
 		u32 sleep;
 	} udp_ports;
+	struct nsim_dev_psample *psample;
 };
 
 static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev)
diff --git a/drivers/net/netdevsim/psample.c b/drivers/net/netdevsim/psample.c
new file mode 100644
index 000000000000..5ec3bd7f891b
--- /dev/null
+++ b/drivers/net/netdevsim/psample.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Mellanox Technologies. All rights reserved */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/etherdevice.h>
+#include <linux/inet.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <net/devlink.h>
+#include <net/ip.h>
+#include <net/psample.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/udp.h>
+
+#include "netdevsim.h"
+
+#define NSIM_PSAMPLE_REPORT_INTERVAL_MS	100
+#define NSIM_PSAMPLE_INVALID_TC		0xFFFF
+#define NSIM_PSAMPLE_L4_DATA_LEN	100
+
+struct nsim_dev_psample {
+	struct delayed_work psample_dw;
+	struct dentry *ddir;
+	struct psample_group *group;
+	u32 rate;
+	u32 group_num;
+	u32 trunc_size;
+	int in_ifindex;
+	int out_ifindex;
+	u16 out_tc;
+	u64 out_tc_occ_max;
+	u64 latency_max;
+	bool is_active;
+};
+
+static struct sk_buff *nsim_dev_psample_skb_build(void)
+{
+	int tot_len, data_len = NSIM_PSAMPLE_L4_DATA_LEN;
+	struct sk_buff *skb;
+	struct udphdr *udph;
+	struct ethhdr *eth;
+	struct iphdr *iph;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+	tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len;
+
+	skb_reset_mac_header(skb);
+	eth = skb_put(skb, sizeof(struct ethhdr));
+	eth_random_addr(eth->h_dest);
+	eth_random_addr(eth->h_source);
+	eth->h_proto = htons(ETH_P_IP);
+	skb->protocol = htons(ETH_P_IP);
+
+	skb_set_network_header(skb, skb->len);
+	iph = skb_put(skb, sizeof(struct iphdr));
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = in_aton("192.0.2.1");
+	iph->daddr = in_aton("198.51.100.1");
+	iph->version = 0x4;
+	iph->frag_off = 0;
+	iph->ihl = 0x5;
+	iph->tot_len = htons(tot_len);
+	iph->id = 0;
+	iph->ttl = 100;
+	iph->check = 0;
+	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	skb_set_transport_header(skb, skb->len);
+	udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len);
+	get_random_bytes(&udph->source, sizeof(u16));
+	get_random_bytes(&udph->dest, sizeof(u16));
+	udph->len = htons(sizeof(struct udphdr) + data_len);
+
+	return skb;
+}
+
+static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample,
+					struct psample_metadata *md)
+{
+	md->trunc_size = psample->trunc_size;
+	md->in_ifindex = psample->in_ifindex;
+	md->out_ifindex = psample->out_ifindex;
+
+	if (psample->out_tc != NSIM_PSAMPLE_INVALID_TC) {
+		md->out_tc = psample->out_tc;
+		md->out_tc_valid = 1;
+	}
+
+	if (psample->out_tc_occ_max) {
+		u64 out_tc_occ;
+
+		get_random_bytes(&out_tc_occ, sizeof(u64));
+		md->out_tc_occ = out_tc_occ & (psample->out_tc_occ_max - 1);
+		md->out_tc_occ_valid = 1;
+	}
+
+	if (psample->latency_max) {
+		u64 latency;
+
+		get_random_bytes(&latency, sizeof(u64));
+		md->latency = latency & (psample->latency_max - 1);
+		md->latency_valid = 1;
+	}
+}
+
+static void nsim_dev_psample_report_work(struct work_struct *work)
+{
+	struct nsim_dev_psample *psample;
+	struct psample_metadata md = {};
+	struct sk_buff *skb;
+	unsigned long delay;
+
+	psample = container_of(work, struct nsim_dev_psample, psample_dw.work);
+
+	skb = nsim_dev_psample_skb_build();
+	if (!skb)
+		goto out;
+
+	nsim_dev_psample_md_prepare(psample, &md);
+	psample_sample_packet(psample->group, skb, psample->rate, &md);
+	consume_skb(skb);
+
+out:
+	delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS);
+	schedule_delayed_work(&psample->psample_dw, delay);
+}
+
+static int nsim_dev_psample_enable(struct nsim_dev *nsim_dev)
+{
+	struct nsim_dev_psample *psample = nsim_dev->psample;
+	struct devlink *devlink;
+	unsigned long delay;
+
+	if (psample->is_active)
+		return -EBUSY;
+
+	devlink = priv_to_devlink(nsim_dev);
+	psample->group = psample_group_get(devlink_net(devlink),
+					   psample->group_num);
+	if (!psample->group)
+		return -EINVAL;
+
+	delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS);
+	schedule_delayed_work(&psample->psample_dw, delay);
+
+	psample->is_active = true;
+
+	return 0;
+}
+
+static int nsim_dev_psample_disable(struct nsim_dev *nsim_dev)
+{
+	struct nsim_dev_psample *psample = nsim_dev->psample;
+
+	if (!psample->is_active)
+		return -EINVAL;
+
+	psample->is_active = false;
+
+	cancel_delayed_work_sync(&psample->psample_dw);
+	psample_group_put(psample->group);
+
+	return 0;
+}
+
+static ssize_t nsim_dev_psample_enable_write(struct file *file,
+					     const char __user *data,
+					     size_t count, loff_t *ppos)
+{
+	struct nsim_dev *nsim_dev = file->private_data;
+	bool enable;
+	int err;
+
+	err = kstrtobool_from_user(data, count, &enable);
+	if (err)
+		return err;
+
+	if (enable)
+		err = nsim_dev_psample_enable(nsim_dev);
+	else
+		err = nsim_dev_psample_disable(nsim_dev);
+
+	return err ? err : count;
+}
+
+static const struct file_operations nsim_psample_enable_fops = {
+	.open = simple_open,
+	.write = nsim_dev_psample_enable_write,
+	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
+};
+
+int nsim_dev_psample_init(struct nsim_dev *nsim_dev)
+{
+	struct nsim_dev_psample *psample;
+	int err;
+
+	psample = kzalloc(sizeof(*psample), GFP_KERNEL);
+	if (!psample)
+		return -ENOMEM;
+	nsim_dev->psample = psample;
+
+	INIT_DELAYED_WORK(&psample->psample_dw, nsim_dev_psample_report_work);
+
+	psample->ddir = debugfs_create_dir("psample", nsim_dev->ddir);
+	if (IS_ERR(psample->ddir)) {
+		err = PTR_ERR(psample->ddir);
+		goto err_psample_free;
+	}
+
+	/* Populate sampling parameters with sane defaults. */
+	psample->rate = 100;
+	debugfs_create_u32("rate", 0600, psample->ddir, &psample->rate);
+
+	psample->group_num = 10;
+	debugfs_create_u32("group_num", 0600, psample->ddir,
+			   &psample->group_num);
+
+	psample->trunc_size = 0;
+	debugfs_create_u32("trunc_size", 0600, psample->ddir,
+			   &psample->trunc_size);
+
+	psample->in_ifindex = 1;
+	debugfs_create_u32("in_ifindex", 0600, psample->ddir,
+			   &psample->in_ifindex);
+
+	psample->out_ifindex = 2;
+	debugfs_create_u32("out_ifindex", 0600, psample->ddir,
+			   &psample->out_ifindex);
+
+	psample->out_tc = 0;
+	debugfs_create_u16("out_tc", 0600, psample->ddir, &psample->out_tc);
+
+	psample->out_tc_occ_max = 10000;
+	debugfs_create_u64("out_tc_occ_max", 0600, psample->ddir,
+			   &psample->out_tc_occ_max);
+
+	psample->latency_max = 50;
+	debugfs_create_u64("latency_max", 0600, psample->ddir,
+			   &psample->latency_max);
+
+	debugfs_create_file("enable", 0200, psample->ddir, nsim_dev,
+			    &nsim_psample_enable_fops);
+
+	return 0;
+
+err_psample_free:
+	kfree(nsim_dev->psample);
+	return err;
+}
+
+void nsim_dev_psample_exit(struct nsim_dev *nsim_dev)
+{
+	debugfs_remove_recursive(nsim_dev->psample->ddir);
+	if (nsim_dev->psample->is_active) {
+		cancel_delayed_work_sync(&nsim_dev->psample->psample_dw);
+		psample_group_put(nsim_dev->psample->group);
+	}
+	kfree(nsim_dev->psample);
+}
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 1aa9903d602e..944ba105cac1 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -15,6 +15,7 @@
 #define SYNOPSYS_XPCS_USXGMII_ID	0x7996ced0
 #define SYNOPSYS_XPCS_10GKR_ID		0x7996ced0
 #define SYNOPSYS_XPCS_XLGMII_ID		0x7996ced0
+#define SYNOPSYS_XPCS_SGMII_ID		0x7996ced0
 #define SYNOPSYS_XPCS_MASK		0xffffffff
 
 /* Vendor regs access */
@@ -57,6 +58,34 @@
 #define DW_C73_2500KX			BIT(0)
 #define DW_C73_5000KR			BIT(1)
 
+/* Clause 37 Defines */
+/* VR MII MMD registers offsets */
+#define DW_VR_MII_DIG_CTRL1		0x8000
+#define DW_VR_MII_AN_CTRL		0x8001
+#define DW_VR_MII_AN_INTR_STS		0x8002
+
+/* VR_MII_DIG_CTRL1 */
+#define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW		BIT(9)
+
+/* VR_MII_AN_CTRL */
+#define DW_VR_MII_AN_CTRL_TX_CONFIG_SHIFT	3
+#define DW_VR_MII_TX_CONFIG_MASK		BIT(3)
+#define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII	0x1
+#define DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII	0x0
+#define DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT	1
+#define DW_VR_MII_PCS_MODE_MASK			GENMASK(2, 1)
+#define DW_VR_MII_PCS_MODE_C37_1000BASEX	0x0
+#define DW_VR_MII_PCS_MODE_C37_SGMII		0x2
+
+/* VR_MII_AN_INTR_STS */
+#define DW_VR_MII_AN_STS_C37_ANSGM_FD		BIT(1)
+#define DW_VR_MII_AN_STS_C37_ANSGM_SP_SHIFT	2
+#define DW_VR_MII_AN_STS_C37_ANSGM_SP		GENMASK(3, 2)
+#define DW_VR_MII_C37_ANSGM_SP_10		0x0
+#define DW_VR_MII_C37_ANSGM_SP_100		0x1
+#define DW_VR_MII_C37_ANSGM_SP_1000		0x2
+#define DW_VR_MII_C37_ANSGM_SP_LNKSTS		BIT(4)
+
 static const int xpcs_usxgmii_features[] = {
 	ETHTOOL_LINK_MODE_Pause_BIT,
 	ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -105,6 +134,16 @@ static const int xpcs_xlgmii_features[] = {
 	__ETHTOOL_LINK_MODE_MASK_NBITS,
 };
 
+static const int xpcs_sgmii_features[] = {
+	ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+	__ETHTOOL_LINK_MODE_MASK_NBITS,
+};
+
 static const phy_interface_t xpcs_usxgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_USXGMII,
 	PHY_INTERFACE_MODE_MAX,
@@ -120,27 +159,42 @@ static const phy_interface_t xpcs_xlgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_MAX,
 };
 
+static const phy_interface_t xpcs_sgmii_interfaces[] = {
+	PHY_INTERFACE_MODE_SGMII,
+	PHY_INTERFACE_MODE_MAX,
+};
+
 static struct xpcs_id {
 	u32 id;
 	u32 mask;
 	const int *supported;
 	const phy_interface_t *interface;
+	int an_mode;
 } xpcs_id_list[] = {
 	{
 		.id = SYNOPSYS_XPCS_USXGMII_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.supported = xpcs_usxgmii_features,
 		.interface = xpcs_usxgmii_interfaces,
+		.an_mode = DW_AN_C73,
 	}, {
 		.id = SYNOPSYS_XPCS_10GKR_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.supported = xpcs_10gkr_features,
 		.interface = xpcs_10gkr_interfaces,
+		.an_mode = DW_AN_C73,
 	}, {
 		.id = SYNOPSYS_XPCS_XLGMII_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.supported = xpcs_xlgmii_features,
 		.interface = xpcs_xlgmii_interfaces,
+		.an_mode = DW_AN_C73,
+	}, {
+		.id = SYNOPSYS_XPCS_SGMII_ID,
+		.mask = SYNOPSYS_XPCS_MASK,
+		.supported = xpcs_sgmii_features,
+		.interface = xpcs_sgmii_interfaces,
+		.an_mode = DW_AN_C37_SGMII,
 	},
 };
 
@@ -195,9 +249,20 @@ static int xpcs_poll_reset(struct mdio_xpcs_args *xpcs, int dev)
 	return (ret & MDIO_CTRL1_RESET) ? -ETIMEDOUT : 0;
 }
 
-static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs, int dev)
+static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs)
 {
-	int ret;
+	int ret, dev;
+
+	switch (xpcs->an_mode) {
+	case DW_AN_C73:
+		dev = MDIO_MMD_PCS;
+		break;
+	case DW_AN_C37_SGMII:
+		dev = MDIO_MMD_VEND2;
+		break;
+	default:
+		return -1;
+	}
 
 	ret = xpcs_write(xpcs, dev, MDIO_CTRL1, MDIO_CTRL1_RESET);
 	if (ret < 0)
@@ -212,8 +277,8 @@ static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs, int dev)
 		dev_warn(&(__xpcs)->bus->dev, ##__args); \
 })
 
-static int xpcs_read_fault(struct mdio_xpcs_args *xpcs,
-			   struct phylink_link_state *state)
+static int xpcs_read_fault_c73(struct mdio_xpcs_args *xpcs,
+			       struct phylink_link_state *state)
 {
 	int ret;
 
@@ -263,7 +328,7 @@ static int xpcs_read_fault(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_read_link(struct mdio_xpcs_args *xpcs, bool an)
+static int xpcs_read_link_c73(struct mdio_xpcs_args *xpcs, bool an)
 {
 	bool link = true;
 	int ret;
@@ -357,7 +422,7 @@ static int xpcs_config_usxgmii(struct mdio_xpcs_args *xpcs, int speed)
 	return xpcs_write_vpcs(xpcs, MDIO_CTRL1, ret | DW_USXGMII_RST);
 }
 
-static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
+static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 {
 	int ret, adv;
 
@@ -401,11 +466,11 @@ static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_AN, DW_SR_AN_ADV1, adv);
 }
 
-static int xpcs_config_aneg(struct mdio_xpcs_args *xpcs)
+static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 {
 	int ret;
 
-	ret = xpcs_config_aneg_c73(xpcs);
+	ret = _xpcs_config_aneg_c73(xpcs);
 	if (ret < 0)
 		return ret;
 
@@ -418,8 +483,8 @@ static int xpcs_config_aneg(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_AN, MDIO_CTRL1, ret);
 }
 
-static int xpcs_aneg_done(struct mdio_xpcs_args *xpcs,
-			  struct phylink_link_state *state)
+static int xpcs_aneg_done_c73(struct mdio_xpcs_args *xpcs,
+			      struct phylink_link_state *state)
 {
 	int ret;
 
@@ -434,7 +499,7 @@ static int xpcs_aneg_done(struct mdio_xpcs_args *xpcs,
 
 		/* Check if Aneg outcome is valid */
 		if (!(ret & DW_C73_AN_ADV_SF)) {
-			xpcs_config_aneg(xpcs);
+			xpcs_config_aneg_c73(xpcs);
 			return 0;
 		}
 
@@ -444,8 +509,8 @@ static int xpcs_aneg_done(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_read_lpa(struct mdio_xpcs_args *xpcs,
-			 struct phylink_link_state *state)
+static int xpcs_read_lpa_c73(struct mdio_xpcs_args *xpcs,
+			     struct phylink_link_state *state)
 {
 	int ret;
 
@@ -493,8 +558,8 @@ static int xpcs_read_lpa(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static void xpcs_resolve_lpa(struct mdio_xpcs_args *xpcs,
-			     struct phylink_link_state *state)
+static void xpcs_resolve_lpa_c73(struct mdio_xpcs_args *xpcs,
+				 struct phylink_link_state *state)
 {
 	int max_speed = xpcs_get_max_usxgmii_speed(state->lp_advertising);
 
@@ -585,32 +650,84 @@ static int xpcs_validate(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
+static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
+{
+	int ret;
+
+	/* For AN for C37 SGMII mode, the settings are :-
+	 * 1) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 10b (SGMII AN)
+	 * 2) VR_MII_AN_CTRL Bit(3) [TX_CONFIG] = 0b (MAC side SGMII)
+	 *    DW xPCS used with DW EQoS MAC is always MAC side SGMII.
+	 * 3) VR_MII_DIG_CTRL1 Bit(9) [MAC_AUTO_SW] = 1b (Automatic
+	 *    speed/duplex mode change by HW after SGMII AN complete)
+	 *
+	 * Note: Since it is MAC side SGMII, there is no need to set
+	 *	 SR_MII_AN_ADV. MAC side SGMII receives AN Tx Config from
+	 *	 PHY about the link state change after C28 AN is completed
+	 *	 between PHY and Link Partner. There is also no need to
+	 *	 trigger AN restart for MAC-side SGMII.
+	 */
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~(DW_VR_MII_PCS_MODE_MASK | DW_VR_MII_TX_CONFIG_MASK);
+	ret |= (DW_VR_MII_PCS_MODE_C37_SGMII <<
+		DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT &
+		DW_VR_MII_PCS_MODE_MASK);
+	ret |= (DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII <<
+		DW_VR_MII_AN_CTRL_TX_CONFIG_SHIFT &
+		DW_VR_MII_TX_CONFIG_MASK);
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, ret);
+	if (ret < 0)
+		return ret;
+
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret |= DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
+
+	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
+}
+
 static int xpcs_config(struct mdio_xpcs_args *xpcs,
 		       const struct phylink_link_state *state)
 {
 	int ret;
 
-	if (state->an_enabled) {
-		ret = xpcs_config_aneg(xpcs);
+	switch (xpcs->an_mode) {
+	case DW_AN_C73:
+		if (state->an_enabled) {
+			ret = xpcs_config_aneg_c73(xpcs);
+			if (ret)
+				return ret;
+		}
+		break;
+	case DW_AN_C37_SGMII:
+		ret = xpcs_config_aneg_c37_sgmii(xpcs);
 		if (ret)
 			return ret;
+		break;
+	default:
+		return -1;
 	}
 
 	return 0;
 }
 
-static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
-			  struct phylink_link_state *state)
+static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
+			      struct phylink_link_state *state)
 {
 	int ret;
 
 	/* Link needs to be read first ... */
-	state->link = xpcs_read_link(xpcs, state->an_enabled) > 0 ? 1 : 0;
+	state->link = xpcs_read_link_c73(xpcs, state->an_enabled) > 0 ? 1 : 0;
 
 	/* ... and then we check the faults. */
-	ret = xpcs_read_fault(xpcs, state);
+	ret = xpcs_read_fault_c73(xpcs, state);
 	if (ret) {
-		ret = xpcs_soft_reset(xpcs, MDIO_MMD_PCS);
+		ret = xpcs_soft_reset(xpcs);
 		if (ret)
 			return ret;
 
@@ -619,10 +736,10 @@ static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
 		return xpcs_config(xpcs, state);
 	}
 
-	if (state->an_enabled && xpcs_aneg_done(xpcs, state)) {
+	if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state)) {
 		state->an_complete = true;
-		xpcs_read_lpa(xpcs, state);
-		xpcs_resolve_lpa(xpcs, state);
+		xpcs_read_lpa_c73(xpcs, state);
+		xpcs_resolve_lpa_c73(xpcs, state);
 	} else if (state->an_enabled) {
 		state->link = 0;
 	} else if (state->link) {
@@ -632,6 +749,70 @@ static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
+static int xpcs_get_state_c37_sgmii(struct mdio_xpcs_args *xpcs,
+				    struct phylink_link_state *state)
+{
+	int ret;
+
+	/* Reset link_state */
+	state->link = false;
+	state->speed = SPEED_UNKNOWN;
+	state->duplex = DUPLEX_UNKNOWN;
+	state->pause = 0;
+
+	/* For C37 SGMII mode, we check DW_VR_MII_AN_INTR_STS for link
+	 * status, speed and duplex.
+	 */
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+	if (ret < 0)
+		return false;
+
+	if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
+		int speed_value;
+
+		state->link = true;
+
+		speed_value = (ret & DW_VR_MII_AN_STS_C37_ANSGM_SP) >>
+			      DW_VR_MII_AN_STS_C37_ANSGM_SP_SHIFT;
+		if (speed_value == DW_VR_MII_C37_ANSGM_SP_1000)
+			state->speed = SPEED_1000;
+		else if (speed_value == DW_VR_MII_C37_ANSGM_SP_100)
+			state->speed = SPEED_100;
+		else
+			state->speed = SPEED_10;
+
+		if (ret & DW_VR_MII_AN_STS_C37_ANSGM_FD)
+			state->duplex = DUPLEX_FULL;
+		else
+			state->duplex = DUPLEX_HALF;
+	}
+
+	return 0;
+}
+
+static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
+			  struct phylink_link_state *state)
+{
+	int ret;
+
+	switch (xpcs->an_mode) {
+	case DW_AN_C73:
+		ret = xpcs_get_state_c73(xpcs, state);
+		if (ret)
+			return ret;
+		break;
+	case DW_AN_C37_SGMII:
+		ret = xpcs_get_state_c37_sgmii(xpcs, state);
+		if (ret)
+			return ret;
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
 static int xpcs_link_up(struct mdio_xpcs_args *xpcs, int speed,
 			phy_interface_t interface)
 {
@@ -646,6 +827,7 @@ static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
 	int ret;
 	u32 id;
 
+	/* First, search C73 PCS using PCS MMD */
 	ret = xpcs_read(xpcs, MDIO_MMD_PCS, MII_PHYSID1);
 	if (ret < 0)
 		return 0xffffffff;
@@ -656,7 +838,26 @@ static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
 	if (ret < 0)
 		return 0xffffffff;
 
-	return id | ret;
+	/* If Device IDs are not all zeros, we found C73 AN-type device */
+	if (id | ret)
+		return id | ret;
+
+	/* Next, search C37 PCS using Vendor-Specific MII MMD */
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID1);
+	if (ret < 0)
+		return 0xffffffff;
+
+	id = ret << 16;
+
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID2);
+	if (ret < 0)
+		return 0xffffffff;
+
+	/* If Device IDs are not all zeros, we found C37 AN-type device */
+	if (id | ret)
+		return id | ret;
+
+	return 0xffffffff;
 }
 
 static bool xpcs_check_features(struct mdio_xpcs_args *xpcs,
@@ -676,6 +877,8 @@ static bool xpcs_check_features(struct mdio_xpcs_args *xpcs,
 	for (i = 0; match->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
 		set_bit(match->supported[i], xpcs->supported);
 
+	xpcs->an_mode = match->an_mode;
+
 	return true;
 }
 
@@ -692,7 +895,7 @@ static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 			match = entry;
 
 			if (xpcs_check_features(xpcs, match, interface))
-				return xpcs_soft_reset(xpcs, MDIO_MMD_PCS);
+				return xpcs_soft_reset(xpcs);
 		}
 	}
 
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 698bea312adc..a615b3660b05 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -201,6 +201,12 @@ config MARVELL_10G_PHY
 	help
 	  Support for the Marvell Alaska MV88X3310 and compatible PHYs.
 
+config MARVELL_88X2222_PHY
+	tristate "Marvell 88X2222 PHY"
+	help
+	  Support for the Marvell 88X2222 Dual-port Multi-speed Ethernet
+	  Transceiver.
+
 config MICREL_PHY
 	tristate "Micrel PHYs"
 	help
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index a13e402074cf..de683e3abe63 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
 obj-$(CONFIG_LXT_PHY)		+= lxt.o
 obj-$(CONFIG_MARVELL_10G_PHY)	+= marvell10g.o
 obj-$(CONFIG_MARVELL_PHY)	+= marvell.o
+obj-$(CONFIG_MARVELL_88X2222_PHY)	+= marvell-88x2222.o
 obj-$(CONFIG_MESON_GXL_PHY)	+= meson-gxl.o
 obj-$(CONFIG_MICREL_KS8995MA)	+= spi_ks8995.o
 obj-$(CONFIG_MICREL_PHY)	+= micrel.o
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index c2aa4c92edde..d7799beb811c 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -751,36 +751,6 @@ static void at803x_link_change_notify(struct phy_device *phydev)
 	}
 }
 
-static int at803x_aneg_done(struct phy_device *phydev)
-{
-	int ccr;
-
-	int aneg_done = genphy_aneg_done(phydev);
-	if (aneg_done != BMSR_ANEGCOMPLETE)
-		return aneg_done;
-
-	/*
-	 * in SGMII mode, if copper side autoneg is successful,
-	 * also check SGMII side autoneg result
-	 */
-	ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
-	if ((ccr & AT803X_MODE_CFG_MASK) != AT803X_MODE_CFG_SGMII)
-		return aneg_done;
-
-	/* switch to SGMII/fiber page */
-	phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL);
-
-	/* check if the SGMII link is OK. */
-	if (!(phy_read(phydev, AT803X_PSSR) & AT803X_PSSR_MR_AN_COMPLETE)) {
-		phydev_warn(phydev, "803x_aneg_done: SGMII link is not ok\n");
-		aneg_done = 0;
-	}
-	/* switch back to copper page */
-	phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL);
-
-	return aneg_done;
-}
-
 static int at803x_read_status(struct phy_device *phydev)
 {
 	int ss, err, old_link = phydev->link;
@@ -1198,7 +1168,6 @@ static struct phy_driver at803x_driver[] = {
 	.resume			= at803x_resume,
 	/* PHY_GBIT_FEATURES */
 	.read_status		= at803x_read_status,
-	.aneg_done		= at803x_aneg_done,
 	.config_intr		= &at803x_config_intr,
 	.handle_interrupt	= at803x_handle_interrupt,
 	.get_tunable		= at803x_get_tunable,
diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
new file mode 100644
index 000000000000..eca8c2f20684
--- /dev/null
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Marvell 88x2222 dual-port multi-speed ethernet transceiver.
+ *
+ * Supports:
+ *	XAUI on the host side.
+ *	1000Base-X or 10GBase-R on the line side.
+ *	SGMII over 1000Base-X.
+ */
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/mdio.h>
+#include <linux/marvell_phy.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/sfp.h>
+#include <linux/netdevice.h>
+
+/* Port PCS Configuration */
+#define	MV_PCS_CONFIG		0xF002
+#define	MV_PCS_HOST_XAUI	0x73
+#define	MV_PCS_LINE_10GBR	(0x71 << 8)
+#define	MV_PCS_LINE_1GBX_AN	(0x7B << 8)
+#define	MV_PCS_LINE_SGMII_AN	(0x7F << 8)
+
+/* Port Reset and Power Down */
+#define	MV_PORT_RST	0xF003
+#define	MV_LINE_RST_SW	BIT(15)
+#define	MV_HOST_RST_SW	BIT(7)
+#define	MV_PORT_RST_SW	(MV_LINE_RST_SW | MV_HOST_RST_SW)
+
+/* 1000Base-X/SGMII Control Register */
+#define	MV_1GBX_CTRL		(0x2000 + MII_BMCR)
+
+/* 1000BASE-X/SGMII Status Register */
+#define	MV_1GBX_STAT		(0x2000 + MII_BMSR)
+
+/* 1000Base-X Auto-Negotiation Advertisement Register */
+#define	MV_1GBX_ADVERTISE	(0x2000 + MII_ADVERTISE)
+
+/* 1000Base-X PHY Specific Status Register */
+#define	MV_1GBX_PHY_STAT		0xA003
+#define	MV_1GBX_PHY_STAT_AN_RESOLVED	BIT(11)
+#define	MV_1GBX_PHY_STAT_DUPLEX		BIT(13)
+#define	MV_1GBX_PHY_STAT_SPEED100	BIT(14)
+#define	MV_1GBX_PHY_STAT_SPEED1000	BIT(15)
+
+struct mv2222_data {
+	phy_interface_t line_interface;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+};
+
+/* SFI PMA transmit enable */
+static int mv2222_tx_enable(struct phy_device *phydev)
+{
+	return phy_clear_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_TXDIS,
+				  MDIO_PMD_TXDIS_GLOBAL);
+}
+
+/* SFI PMA transmit disable */
+static int mv2222_tx_disable(struct phy_device *phydev)
+{
+	return phy_set_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_TXDIS,
+				MDIO_PMD_TXDIS_GLOBAL);
+}
+
+static int mv2222_soft_reset(struct phy_device *phydev)
+{
+	int val, ret;
+
+	ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PORT_RST,
+			    MV_PORT_RST_SW);
+	if (ret < 0)
+		return ret;
+
+	return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND2, MV_PORT_RST,
+					 val, !(val & MV_PORT_RST_SW),
+					 5000, 1000000, true);
+}
+
+/* Returns negative on error, 0 if link is down, 1 if link is up */
+static int mv2222_read_status_10g(struct phy_device *phydev)
+{
+	int val, link = 0;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+	if (val < 0)
+		return val;
+
+	if (val & MDIO_STAT1_LSTATUS) {
+		link = 1;
+
+		/* 10GBASE-R do not support auto-negotiation */
+		phydev->autoneg = AUTONEG_DISABLE;
+		phydev->speed = SPEED_10000;
+		phydev->duplex = DUPLEX_FULL;
+	}
+
+	return link;
+}
+
+/* Returns negative on error, 0 if link is down, 1 if link is up */
+static int mv2222_read_status_1g(struct phy_device *phydev)
+{
+	int val, link = 0;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_STAT);
+	if (val < 0)
+		return val;
+
+	if (!(val & BMSR_LSTATUS) ||
+	    (phydev->autoneg == AUTONEG_ENABLE &&
+	     !(val & BMSR_ANEGCOMPLETE)))
+		return 0;
+
+	link = 1;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_PHY_STAT);
+	if (val < 0)
+		return val;
+
+	if (val & MV_1GBX_PHY_STAT_AN_RESOLVED) {
+		if (val & MV_1GBX_PHY_STAT_DUPLEX)
+			phydev->duplex = DUPLEX_FULL;
+		else
+			phydev->duplex = DUPLEX_HALF;
+
+		if (val & MV_1GBX_PHY_STAT_SPEED1000)
+			phydev->speed = SPEED_1000;
+		else if (val & MV_1GBX_PHY_STAT_SPEED100)
+			phydev->speed = SPEED_100;
+		else
+			phydev->speed = SPEED_10;
+	}
+
+	return link;
+}
+
+static int mv2222_read_status(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int link;
+
+	phydev->link = 0;
+	phydev->speed = SPEED_UNKNOWN;
+	phydev->duplex = DUPLEX_UNKNOWN;
+
+	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
+		link = mv2222_read_status_10g(phydev);
+	else
+		link = mv2222_read_status_1g(phydev);
+
+	if (link < 0)
+		return link;
+
+	phydev->link = link;
+
+	return 0;
+}
+
+static int mv2222_disable_aneg(struct phy_device *phydev)
+{
+	int ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_CTRL,
+				     BMCR_ANENABLE | BMCR_ANRESTART);
+	if (ret < 0)
+		return ret;
+
+	return mv2222_soft_reset(phydev);
+}
+
+static int mv2222_enable_aneg(struct phy_device *phydev)
+{
+	int ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_CTRL,
+				   BMCR_ANENABLE | BMCR_RESET);
+	if (ret < 0)
+		return ret;
+
+	return mv2222_soft_reset(phydev);
+}
+
+static int mv2222_set_sgmii_speed(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	switch (phydev->speed) {
+	default:
+	case SPEED_1000:
+		if ((linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				       priv->supported) ||
+		     linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+				       priv->supported)))
+			return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+					      MV_1GBX_CTRL,
+					      BMCR_SPEED1000 | BMCR_SPEED100,
+					      BMCR_SPEED1000);
+
+		fallthrough;
+	case SPEED_100:
+		if ((linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				       priv->supported) ||
+		     linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+				       priv->supported)))
+			return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+					      MV_1GBX_CTRL,
+					      BMCR_SPEED1000 | BMCR_SPEED100,
+					      BMCR_SPEED100);
+		fallthrough;
+	case SPEED_10:
+		if ((linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+				       priv->supported) ||
+		     linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+				       priv->supported)))
+			return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+					      MV_1GBX_CTRL,
+					      BMCR_SPEED1000 | BMCR_SPEED100,
+					      BMCR_SPEED10);
+
+		return -EINVAL;
+	}
+}
+
+static bool mv2222_is_10g_capable(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	return (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+				  priv->supported));
+}
+
+static bool mv2222_is_1gbx_capable(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	return linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+				 priv->supported);
+}
+
+static int mv2222_config_line(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	switch (priv->line_interface) {
+	case PHY_INTERFACE_MODE_10GBASER:
+		return phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PCS_CONFIG,
+				     MV_PCS_HOST_XAUI | MV_PCS_LINE_10GBR);
+	case PHY_INTERFACE_MODE_1000BASEX:
+		return phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PCS_CONFIG,
+				     MV_PCS_HOST_XAUI | MV_PCS_LINE_1GBX_AN);
+	case PHY_INTERFACE_MODE_SGMII:
+		return phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PCS_CONFIG,
+				     MV_PCS_HOST_XAUI | MV_PCS_LINE_SGMII_AN);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int mv2222_setup_forced(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	bool changed = false;
+	int ret;
+
+	switch (priv->line_interface) {
+	case PHY_INTERFACE_MODE_10GBASER:
+		if (phydev->speed == SPEED_1000 &&
+		    mv2222_is_1gbx_capable(phydev)) {
+			priv->line_interface = PHY_INTERFACE_MODE_1000BASEX;
+			changed = true;
+		}
+
+		break;
+	case PHY_INTERFACE_MODE_1000BASEX:
+		if (phydev->speed == SPEED_10000 &&
+		    mv2222_is_10g_capable(phydev)) {
+			priv->line_interface = PHY_INTERFACE_MODE_10GBASER;
+			changed = true;
+		}
+
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		ret = mv2222_set_sgmii_speed(phydev);
+		if (ret < 0)
+			return ret;
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (changed) {
+		ret = mv2222_config_line(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
+	return mv2222_disable_aneg(phydev);
+}
+
+static int mv2222_config_aneg(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int ret, adv;
+
+	/* SFP is not present, do nothing */
+	if (priv->line_interface == PHY_INTERFACE_MODE_NA)
+		return 0;
+
+	if (phydev->autoneg == AUTONEG_DISABLE ||
+	    phydev->speed == SPEED_10000)
+		return mv2222_setup_forced(phydev);
+
+	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER &&
+	    mv2222_is_1gbx_capable(phydev)) {
+		priv->line_interface = PHY_INTERFACE_MODE_1000BASEX;
+		ret = mv2222_config_line(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
+	adv = linkmode_adv_to_mii_adv_x(priv->supported,
+					ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
+
+	ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_ADVERTISE,
+			     ADVERTISE_1000XFULL |
+			     ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM,
+			     adv);
+	if (ret < 0)
+		return ret;
+
+	return mv2222_enable_aneg(phydev);
+}
+
+static int mv2222_aneg_done(struct phy_device *phydev)
+{
+	int ret;
+
+	if (mv2222_is_10g_capable(phydev)) {
+		ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+		if (ret < 0)
+			return ret;
+
+		if (ret & MDIO_STAT1_LSTATUS)
+			return 1;
+	}
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_STAT);
+	if (ret < 0)
+		return ret;
+
+	return (ret & BMSR_ANEGCOMPLETE);
+}
+
+static int mv2222_resume(struct phy_device *phydev)
+{
+	return mv2222_tx_enable(phydev);
+}
+
+static int mv2222_suspend(struct phy_device *phydev)
+{
+	return mv2222_tx_disable(phydev);
+}
+
+static int mv2222_get_features(struct phy_device *phydev)
+{
+	/* All supported linkmodes are set at probe */
+
+	return 0;
+}
+
+static int mv2222_config_init(struct phy_device *phydev)
+{
+	if (phydev->interface != PHY_INTERFACE_MODE_XAUI)
+		return -EINVAL;
+
+	phydev->autoneg = AUTONEG_DISABLE;
+
+	return 0;
+}
+
+static int mv2222_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+	struct phy_device *phydev = upstream;
+	phy_interface_t sfp_interface;
+	struct mv2222_data *priv;
+	struct device *dev;
+	int ret;
+
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_supported) = { 0, };
+
+	priv = (struct mv2222_data *)phydev->priv;
+	dev = &phydev->mdio.dev;
+
+	sfp_parse_support(phydev->sfp_bus, id, sfp_supported);
+	sfp_interface = sfp_select_interface(phydev->sfp_bus, sfp_supported);
+
+	dev_info(dev, "%s SFP module inserted\n", phy_modes(sfp_interface));
+
+	if (sfp_interface != PHY_INTERFACE_MODE_10GBASER &&
+	    sfp_interface != PHY_INTERFACE_MODE_1000BASEX &&
+	    sfp_interface != PHY_INTERFACE_MODE_SGMII) {
+		dev_err(dev, "Incompatible SFP module inserted\n");
+
+		return -EINVAL;
+	}
+
+	priv->line_interface = sfp_interface;
+	linkmode_and(priv->supported, phydev->supported, sfp_supported);
+
+	ret = mv2222_config_line(phydev);
+	if (ret < 0)
+		return ret;
+
+	if (mutex_trylock(&phydev->lock)) {
+		if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
+			ret = mv2222_setup_forced(phydev);
+		else
+			ret = mv2222_config_aneg(phydev);
+
+		mutex_unlock(&phydev->lock);
+	}
+
+	return ret;
+}
+
+static void mv2222_sfp_remove(void *upstream)
+{
+	struct phy_device *phydev = upstream;
+	struct mv2222_data *priv;
+
+	priv = (struct mv2222_data *)phydev->priv;
+
+	priv->line_interface = PHY_INTERFACE_MODE_NA;
+	linkmode_zero(priv->supported);
+}
+
+static const struct sfp_upstream_ops sfp_phy_ops = {
+	.module_insert = mv2222_sfp_insert,
+	.module_remove = mv2222_sfp_remove,
+	.attach = phy_sfp_attach,
+	.detach = phy_sfp_detach,
+};
+
+static int mv2222_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct mv2222_data *priv = NULL;
+
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
+
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, supported);
+
+	linkmode_copy(phydev->supported, supported);
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->line_interface = PHY_INTERFACE_MODE_NA;
+	phydev->priv = priv;
+
+	return phy_sfp_probe(phydev, &sfp_phy_ops);
+}
+
+static struct phy_driver mv2222_drivers[] = {
+	{
+		.phy_id = MARVELL_PHY_ID_88X2222,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88X2222",
+		.get_features = mv2222_get_features,
+		.soft_reset = mv2222_soft_reset,
+		.config_init = mv2222_config_init,
+		.config_aneg = mv2222_config_aneg,
+		.aneg_done = mv2222_aneg_done,
+		.probe = mv2222_probe,
+		.suspend = mv2222_suspend,
+		.resume = mv2222_resume,
+		.read_status = mv2222_read_status,
+	},
+};
+module_phy_driver(mv2222_drivers);
+
+static struct mdio_device_id __maybe_unused mv2222_tbl[] = {
+	{ MARVELL_PHY_ID_88X2222, MARVELL_PHY_ID_MASK },
+	{ }
+};
+MODULE_DEVICE_TABLE(mdio, mv2222_tbl);
+
+MODULE_DESCRIPTION("Marvell 88x2222 ethernet transceiver driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index b1bb9b8e1e4e..74b64e52ffa2 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -781,6 +781,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.get_tunable	= mv3310_get_tunable,
 		.set_tunable	= mv3310_set_tunable,
 		.remove		= mv3310_remove,
+		.set_loopback	= genphy_c45_loopback,
 	},
 	{
 		.phy_id		= MARVELL_PHY_ID_88E2110,
@@ -796,6 +797,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.get_tunable	= mv3310_get_tunable,
 		.set_tunable	= mv3310_set_tunable,
 		.remove		= mv3310_remove,
+		.set_loopback	= genphy_c45_loopback,
 	},
 };
 
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 3a7705228ed5..6e32da28e138 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -1362,6 +1362,12 @@ static int vsc8584_config_pre_init(struct phy_device *phydev)
 	u16 crc, reg;
 	int ret;
 
+	ret = vsc8584_pll5g_reset(phydev);
+	if (ret < 0) {
+		dev_err(dev, "failed LCPLL reset, ret: %d\n", ret);
+		return ret;
+	}
+
 	phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD);
 
 	/* all writes below are broadcasted to all PHYs in the same package */
@@ -1466,6 +1472,24 @@ static int vsc8584_config_pre_init(struct phy_device *phydev)
 	if (ret)
 		goto out;
 
+	/* Write patch vector 0, to skip IB cal polling  */
+	phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED_GPIO);
+	reg = MSCC_ROM_TRAP_SERDES_6G_CFG; /* ROM address to trap, for patch vector 0 */
+	ret = phy_base_write(phydev, MSCC_TRAP_ROM_ADDR(1), reg);
+	if (ret)
+		goto out;
+
+	reg = MSCC_RAM_TRAP_SERDES_6G_CFG; /* RAM address to jump to, when patch vector 0 enabled */
+	ret = phy_base_write(phydev, MSCC_PATCH_RAM_ADDR(1), reg);
+	if (ret)
+		goto out;
+
+	reg = phy_base_read(phydev, MSCC_INT_MEM_CNTL);
+	reg |= PATCH_VEC_ZERO_EN; /* bit 8, enable patch vector 0 */
+	ret = phy_base_write(phydev, MSCC_INT_MEM_CNTL, reg);
+	if (ret)
+		goto out;
+
 	vsc8584_micro_deassert_reset(phydev, true);
 
 out:
@@ -1531,62 +1555,81 @@ static void vsc85xx_coma_mode_release(struct phy_device *phydev)
 	vsc85xx_phy_write_page(phydev, MSCC_PHY_PAGE_STANDARD);
 }
 
-static int vsc8584_config_init(struct phy_device *phydev)
+static int vsc8584_config_host_serdes(struct phy_device *phydev)
 {
 	struct vsc8531_private *vsc8531 = phydev->priv;
-	int ret, i;
+	int ret;
 	u16 val;
 
-	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
+			     MSCC_PHY_PAGE_EXTENDED_GPIO);
+	if (ret)
+		return ret;
 
-	phy_lock_mdio_bus(phydev);
+	val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK);
+	val &= ~MAC_CFG_MASK;
+	if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) {
+		val |= MAC_CFG_QSGMII;
+	} else if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+		val |= MAC_CFG_SGMII;
+	} else {
+		ret = -EINVAL;
+		return ret;
+	}
 
-	/* Some parts of the init sequence are identical for every PHY in the
-	 * package. Some parts are modifying the GPIO register bank which is a
-	 * set of registers that are affecting all PHYs, a few resetting the
-	 * microprocessor common to all PHYs. The CRC check responsible of the
-	 * checking the firmware within the 8051 microprocessor can only be
-	 * accessed via the PHY whose internal address in the package is 0.
-	 * All PHYs' interrupts mask register has to be zeroed before enabling
-	 * any PHY's interrupt in this register.
-	 * For all these reasons, we need to do the init sequence once and only
-	 * once whatever is the first PHY in the package that is initialized and
-	 * do the correct init sequence for all PHYs that are package-critical
-	 * in this pre-init function.
-	 */
-	if (phy_package_init_once(phydev)) {
-		/* The following switch statement assumes that the lowest
-		 * nibble of the phy_id_mask is always 0. This works because
-		 * the lowest nibble of the PHY_ID's below are also 0.
-		 */
-		WARN_ON(phydev->drv->phy_id_mask & 0xf);
+	ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val);
+	if (ret)
+		return ret;
 
-		switch (phydev->phy_id & phydev->drv->phy_id_mask) {
-		case PHY_ID_VSC8504:
-		case PHY_ID_VSC8552:
-		case PHY_ID_VSC8572:
-		case PHY_ID_VSC8574:
-			ret = vsc8574_config_pre_init(phydev);
-			break;
-		case PHY_ID_VSC856X:
-		case PHY_ID_VSC8575:
-		case PHY_ID_VSC8582:
-		case PHY_ID_VSC8584:
-			ret = vsc8584_config_pre_init(phydev);
-			break;
-		default:
-			ret = -EINVAL;
-			break;
-		}
+	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
+			     MSCC_PHY_PAGE_STANDARD);
+	if (ret)
+		return ret;
 
-		if (ret)
-			goto err;
-	}
+	val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT |
+		PROC_CMD_READ_MOD_WRITE_PORT;
+	if (phydev->interface == PHY_INTERFACE_MODE_QSGMII)
+		val |= PROC_CMD_QSGMII_MAC;
+	else
+		val |= PROC_CMD_SGMII_MAC;
+
+	ret = vsc8584_cmd(phydev, val);
+	if (ret)
+		return ret;
+
+	usleep_range(10000, 20000);
+
+	/* Disable SerDes for 100Base-FX */
+	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
+			  PROC_CMD_FIBER_PORT(vsc8531->addr) |
+			  PROC_CMD_FIBER_DISABLE |
+			  PROC_CMD_READ_MOD_WRITE_PORT |
+			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_100BASE_FX);
+	if (ret)
+		return ret;
+
+	/* Disable SerDes for 1000Base-X */
+	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
+			  PROC_CMD_FIBER_PORT(vsc8531->addr) |
+			  PROC_CMD_FIBER_DISABLE |
+			  PROC_CMD_READ_MOD_WRITE_PORT |
+			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_1000BASE_X);
+	if (ret)
+		return ret;
+
+	return vsc85xx_sd6g_config_v2(phydev);
+}
+
+static int vsc8574_config_host_serdes(struct phy_device *phydev)
+{
+	struct vsc8531_private *vsc8531 = phydev->priv;
+	int ret;
+	u16 val;
 
 	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
 			     MSCC_PHY_PAGE_EXTENDED_GPIO);
 	if (ret)
-		goto err;
+		return ret;
 
 	val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK);
 	val &= ~MAC_CFG_MASK;
@@ -1598,17 +1641,17 @@ static int vsc8584_config_init(struct phy_device *phydev)
 		val |= MAC_CFG_RGMII;
 	} else {
 		ret = -EINVAL;
-		goto err;
+		return ret;
 	}
 
 	ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val);
 	if (ret)
-		goto err;
+		return ret;
 
 	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
 			     MSCC_PHY_PAGE_STANDARD);
 	if (ret)
-		goto err;
+		return ret;
 
 	if (!phy_interface_is_rgmii(phydev)) {
 		val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT |
@@ -1620,7 +1663,7 @@ static int vsc8584_config_init(struct phy_device *phydev)
 
 		ret = vsc8584_cmd(phydev, val);
 		if (ret)
-			goto err;
+			return ret;
 
 		usleep_range(10000, 20000);
 	}
@@ -1632,16 +1675,78 @@ static int vsc8584_config_init(struct phy_device *phydev)
 			  PROC_CMD_READ_MOD_WRITE_PORT |
 			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_100BASE_FX);
 	if (ret)
-		goto err;
+		return ret;
 
 	/* Disable SerDes for 1000Base-X */
-	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
-			  PROC_CMD_FIBER_PORT(vsc8531->addr) |
-			  PROC_CMD_FIBER_DISABLE |
-			  PROC_CMD_READ_MOD_WRITE_PORT |
-			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_1000BASE_X);
-	if (ret)
-		goto err;
+	return vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
+			   PROC_CMD_FIBER_PORT(vsc8531->addr) |
+			   PROC_CMD_FIBER_DISABLE |
+			   PROC_CMD_READ_MOD_WRITE_PORT |
+			   PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_1000BASE_X);
+}
+
+static int vsc8584_config_init(struct phy_device *phydev)
+{
+	struct vsc8531_private *vsc8531 = phydev->priv;
+	int ret, i;
+	u16 val;
+
+	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+
+	phy_lock_mdio_bus(phydev);
+
+	/* Some parts of the init sequence are identical for every PHY in the
+	 * package. Some parts are modifying the GPIO register bank which is a
+	 * set of registers that are affecting all PHYs, a few resetting the
+	 * microprocessor common to all PHYs. The CRC check responsible of the
+	 * checking the firmware within the 8051 microprocessor can only be
+	 * accessed via the PHY whose internal address in the package is 0.
+	 * All PHYs' interrupts mask register has to be zeroed before enabling
+	 * any PHY's interrupt in this register.
+	 * For all these reasons, we need to do the init sequence once and only
+	 * once whatever is the first PHY in the package that is initialized and
+	 * do the correct init sequence for all PHYs that are package-critical
+	 * in this pre-init function.
+	 */
+	if (phy_package_init_once(phydev)) {
+		/* The following switch statement assumes that the lowest
+		 * nibble of the phy_id_mask is always 0. This works because
+		 * the lowest nibble of the PHY_ID's below are also 0.
+		 */
+		WARN_ON(phydev->drv->phy_id_mask & 0xf);
+
+		switch (phydev->phy_id & phydev->drv->phy_id_mask) {
+		case PHY_ID_VSC8504:
+		case PHY_ID_VSC8552:
+		case PHY_ID_VSC8572:
+		case PHY_ID_VSC8574:
+			ret = vsc8574_config_pre_init(phydev);
+			if (ret)
+				goto err;
+			ret = vsc8574_config_host_serdes(phydev);
+			if (ret)
+				goto err;
+			break;
+		case PHY_ID_VSC856X:
+		case PHY_ID_VSC8575:
+		case PHY_ID_VSC8582:
+		case PHY_ID_VSC8584:
+			ret = vsc8584_config_pre_init(phydev);
+			if (ret)
+				goto err;
+			ret = vsc8584_config_host_serdes(phydev);
+			if (ret)
+				goto err;
+			vsc85xx_coma_mode_release(phydev);
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+
+		if (ret)
+			goto err;
+	}
 
 	phy_unlock_mdio_bus(phydev);
 
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 077f2929c45e..91e3acb9e397 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -560,6 +560,14 @@ int gen10g_config_aneg(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
+int genphy_c45_loopback(struct phy_device *phydev, bool enable)
+{
+	return phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1,
+			      MDIO_PCS_CTRL1_LOOPBACK,
+			      enable ? MDIO_PCS_CTRL1_LOOPBACK : 0);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_loopback);
+
 struct phy_driver genphy_c45_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index cc38e326405a..a009d1769b08 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -512,10 +512,21 @@ phy_has_fixups_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(phy_has_fixups);
 
+static ssize_t phy_dev_flags_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+
+	return sprintf(buf, "0x%08x\n", phydev->dev_flags);
+}
+static DEVICE_ATTR_RO(phy_dev_flags);
+
 static struct attribute *phy_dev_attrs[] = {
 	&dev_attr_phy_id.attr,
 	&dev_attr_phy_interface.attr,
 	&dev_attr_phy_has_fixups.attr,
+	&dev_attr_phy_dev_flags.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(phy_dev);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index dc2800beacc3..96d8e88b4e46 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -271,8 +271,9 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
 		pl->cfg_link_an_mode = MLO_AN_FIXED;
 	fwnode_handle_put(dn);
 
-	if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
-	    strcmp(managed, "in-band-status") == 0) {
+	if ((fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
+	     strcmp(managed, "in-band-status") == 0) ||
+	    pl->config->ovr_an_inband) {
 		if (pl->cfg_link_an_mode == MLO_AN_FIXED) {
 			phylink_err(pl,
 				    "can't use both fixed-link and in-band-status\n");
diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
index 4406b353123e..e26cf91bdec2 100644
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -516,6 +516,7 @@ plip_receive(unsigned short nibble_timeout, struct net_device *dev,
 		*data_p |= (c0 << 1) & 0xf0;
 		write_data (dev, 0x00); /* send ACK */
 		*ns_p = PLIP_NB_BEGIN;
+		break;
 	case PLIP_NB_2:
 		break;
 	}
@@ -808,6 +809,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl,
 				return HS_TIMEOUT;
 			}
 		}
+		break;
 
 	case PLIP_PK_LENGTH_LSB:
 		if (plip_send(nibble_timeout, dev,
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index d445ecb1d0c7..930e49ef15f6 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1560,12 +1560,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
 		ppp_destroy_interface(ppp);
 }
 
+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
+				 struct net_device_path *path)
+{
+	struct ppp *ppp = netdev_priv(ctx->dev);
+	struct ppp_channel *chan;
+	struct channel *pch;
+
+	if (ppp->flags & SC_MULTILINK)
+		return -EOPNOTSUPP;
+
+	if (list_empty(&ppp->channels))
+		return -ENODEV;
+
+	pch = list_first_entry(&ppp->channels, struct channel, clist);
+	chan = pch->chan;
+	if (!chan->ops->fill_forward_path)
+		return -EOPNOTSUPP;
+
+	return chan->ops->fill_forward_path(ctx, path, chan);
+}
+
 static const struct net_device_ops ppp_netdev_ops = {
 	.ndo_init	 = ppp_dev_init,
 	.ndo_uninit      = ppp_dev_uninit,
 	.ndo_start_xmit  = ppp_start_xmit,
 	.ndo_do_ioctl    = ppp_net_ioctl,
 	.ndo_get_stats64 = ppp_get_stats64,
+	.ndo_fill_forward_path = ppp_fill_forward_path,
 };
 
 static struct device_type ppp_type = {
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index d7f50b835050..3619520340b7 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -25,7 +25,7 @@
  *		in pppoe_release.
  * 051000 :	Initialization cleanup.
  * 111100 :	Fix recvmsg.
- * 050101 :	Fix PADT procesing.
+ * 050101 :	Fix PADT processing.
  * 140501 :	Use pppoe_rcv_core to handle all backlog. (Alexey)
  * 170701 :	Do not lock_sock with rwlock held. (DaveM)
  *		Ignore discovery frames if user has socket
@@ -96,7 +96,7 @@ struct pppoe_net {
 	 * we could use _single_ hash table for all
 	 * nets by injecting net id into the hash but
 	 * it would increase hash chains and add
-	 * a few additional math comparations messy
+	 * a few additional math comparisons messy
 	 * as well, moreover in case of SMP less locking
 	 * controversy here
 	 */
@@ -972,8 +972,31 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	return __pppoe_xmit(sk, skb);
 }
 
+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
+				   struct net_device_path *path,
+				   const struct ppp_channel *chan)
+{
+	struct sock *sk = (struct sock *)chan->private;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct net_device *dev = po->pppoe_dev;
+
+	if (sock_flag(sk, SOCK_DEAD) ||
+	    !(sk->sk_state & PPPOX_CONNECTED) || !dev)
+		return -1;
+
+	path->type = DEV_PATH_PPPOE;
+	path->encap.proto = htons(ETH_P_PPP_SES);
+	path->encap.id = be16_to_cpu(po->num);
+	memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
+	path->dev = ctx->dev;
+	ctx->dev = dev;
+
+	return 0;
+}
+
 static const struct ppp_channel_ops pppoe_chan_ops = {
 	.start_xmit = pppoe_xmit,
+	.fill_forward_path = pppoe_fill_forward_path,
 };
 
 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 8acf30115428..8ae565a801b5 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -920,7 +920,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 		goto error2;
 	}
 
-	usb_set_intfdata(ctx->data, dev);
 	usb_set_intfdata(ctx->control, dev);
 
 	if (ctx->ether_desc) {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 82e520d2cb12..584a9bd59dda 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -195,6 +195,9 @@ struct virtnet_info {
 	/* # of XDP queue pairs currently used by the driver */
 	u16 xdp_queue_pairs;
 
+	/* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
+	bool xdp_enabled;
+
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
@@ -481,12 +484,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
 	return 0;
 }
 
-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
-{
-	unsigned int qp;
-
-	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
-	return &vi->sq[qp];
+/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
+ * the current cpu, so it does not need to be locked.
+ *
+ * Here we use marco instead of inline functions because we have to deal with
+ * three issues at the same time: 1. the choice of sq. 2. judge and execute the
+ * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
+ * functions to perfectly solve these three problems at the same time.
+ */
+#define virtnet_xdp_get_sq(vi) ({                                       \
+	struct netdev_queue *txq;                                       \
+	typeof(vi) v = (vi);                                            \
+	unsigned int qp;                                                \
+									\
+	if (v->curr_queue_pairs > nr_cpu_ids) {                         \
+		qp = v->curr_queue_pairs - v->xdp_queue_pairs;          \
+		qp += smp_processor_id();                               \
+		txq = netdev_get_tx_queue(v->dev, qp);                  \
+		__netif_tx_acquire(txq);                                \
+	} else {                                                        \
+		qp = smp_processor_id() % v->curr_queue_pairs;          \
+		txq = netdev_get_tx_queue(v->dev, qp);                  \
+		__netif_tx_lock(txq, raw_smp_processor_id());           \
+	}                                                               \
+	v->sq + qp;                                                     \
+})
+
+#define virtnet_xdp_put_sq(vi, q) {                                     \
+	struct netdev_queue *txq;                                       \
+	typeof(vi) v = (vi);                                            \
+									\
+	txq = netdev_get_tx_queue(v->dev, (q) - v->sq);                 \
+	if (v->curr_queue_pairs > nr_cpu_ids)                           \
+		__netif_tx_release(txq);                                \
+	else                                                            \
+		__netif_tx_unlock(txq);                                 \
 }
 
 static int virtnet_xdp_xmit(struct net_device *dev,
@@ -512,7 +544,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
 	if (!xdp_prog)
 		return -ENXIO;
 
-	sq = virtnet_xdp_sq(vi);
+	sq = virtnet_xdp_get_sq(vi);
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
 		ret = -EINVAL;
@@ -560,12 +592,13 @@ out:
 	sq->stats.kicks += kicks;
 	u64_stats_update_end(&sq->stats.syncp);
 
+	virtnet_xdp_put_sq(vi, sq);
 	return ret;
 }
 
 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
 {
-	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
+	return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
 }
 
 /* We copy the packet for XDP in the following cases:
@@ -1458,12 +1491,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
 		xdp_do_flush();
 
 	if (xdp_xmit & VIRTIO_XDP_TX) {
-		sq = virtnet_xdp_sq(vi);
+		sq = virtnet_xdp_get_sq(vi);
 		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
 			u64_stats_update_begin(&sq->stats.syncp);
 			sq->stats.kicks++;
 			u64_stats_update_end(&sq->stats.syncp);
 		}
+		virtnet_xdp_put_sq(vi, sq);
 	}
 
 	return received;
@@ -1981,7 +2015,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
 		}
 		virtqueue_set_affinity(vi->rq[i].vq, mask);
 		virtqueue_set_affinity(vi->sq[i].vq, mask);
-		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false);
+		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
 		cpumask_clear(mask);
 	}
 
@@ -2104,25 +2138,21 @@ static int virtnet_set_channels(struct net_device *dev,
 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	char *p = (char *)data;
 	unsigned int i, j;
+	u8 *p = data;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
 		for (i = 0; i < vi->curr_queue_pairs; i++) {
-			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
-				snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
-					 i, virtnet_rq_stats_desc[j].desc);
-				p += ETH_GSTRING_LEN;
-			}
+			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++)
+				ethtool_sprintf(&p, "rx_queue_%u_%s", i,
+						virtnet_rq_stats_desc[j].desc);
 		}
 
 		for (i = 0; i < vi->curr_queue_pairs; i++) {
-			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
-				snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
-					 i, virtnet_sq_stats_desc[j].desc);
-				p += ETH_GSTRING_LEN;
-			}
+			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++)
+				ethtool_sprintf(&p, "tx_queue_%u_%s", i,
+						virtnet_sq_stats_desc[j].desc);
 		}
 		break;
 	}
@@ -2418,10 +2448,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 
 	/* XDP requires extra queues for XDP_TX */
 	if (curr_qp + xdp_qp > vi->max_queue_pairs) {
-		NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
-		netdev_warn(dev, "request %i queues but max is %i\n",
+		netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
 			    curr_qp + xdp_qp, vi->max_queue_pairs);
-		return -ENOMEM;
+		xdp_qp = 0;
 	}
 
 	old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
@@ -2455,11 +2484,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 	vi->xdp_queue_pairs = xdp_qp;
 
 	if (prog) {
+		vi->xdp_enabled = true;
 		for (i = 0; i < vi->max_queue_pairs; i++) {
 			rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
 			if (i == 0 && !old_prog)
 				virtnet_clear_guest_offloads(vi);
 		}
+	} else {
+		vi->xdp_enabled = false;
 	}
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
@@ -2527,7 +2559,7 @@ static int virtnet_set_features(struct net_device *dev,
 	int err;
 
 	if ((dev->features ^ features) & NETIF_F_LRO) {
-		if (vi->xdp_queue_pairs)
+		if (vi->xdp_enabled)
 			return -EBUSY;
 
 		if (features & NETIF_F_LRO)
@@ -2973,7 +3005,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 		return -ENOMEM;
 
 	/* Set up network device as normal. */
-	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
+	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
+			   IFF_TX_SKB_NO_LINEAR;
 	dev->netdev_ops = &virtnet_netdev;
 	dev->features = NETIF_F_HIGHDMA;
 
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 7ec8652f2c26..c0bd9cbc43b1 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -218,43 +218,28 @@ vmxnet3_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 static void
 vmxnet3_get_strings(struct net_device *netdev, u32 stringset, u8 *buf)
 {
-	 struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-	if (stringset == ETH_SS_STATS) {
-		int i, j;
-		for (j = 0; j < adapter->num_tx_queues; j++) {
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_dev_stats); i++) {
-				memcpy(buf, vmxnet3_tq_dev_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_driver_stats);
-			     i++) {
-				memcpy(buf, vmxnet3_tq_driver_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-		}
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	int i, j;
 
-		for (j = 0; j < adapter->num_rx_queues; j++) {
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_dev_stats); i++) {
-				memcpy(buf, vmxnet3_rq_dev_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_driver_stats);
-			     i++) {
-				memcpy(buf, vmxnet3_rq_driver_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-		}
+	if (stringset != ETH_SS_STATS)
+		return;
 
-		for (i = 0; i < ARRAY_SIZE(vmxnet3_global_stats); i++) {
-			memcpy(buf, vmxnet3_global_stats[i].desc,
-				ETH_GSTRING_LEN);
-			buf += ETH_GSTRING_LEN;
-		}
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_dev_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_tq_dev_stats[i].desc);
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_driver_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_tq_driver_stats[i].desc);
+	}
+
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_dev_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_rq_dev_stats[i].desc);
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_driver_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_rq_driver_stats[i].desc);
 	}
+
+	for (i = 0; i < ARRAY_SIZE(vmxnet3_global_stats); i++)
+		ethtool_sprintf(&buf, vmxnet3_global_stats[i].desc);
 }
 
 netdev_features_t vmxnet3_fix_features(struct net_device *netdev,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 666dd201c3d5..7665817f3cb6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3703,6 +3703,7 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 #if IS_ENABLED(CONFIG_IPV6)
 		if (use_ipv6) {
 			struct inet6_dev *idev = __in6_dev_get(lowerdev);
+
 			if (idev && idev->cnf.disable_ipv6) {
 				NL_SET_ERR_MSG(extack,
 					       "IPv6 support disabled by administrator");
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index c3372498f4f1..45d74285265a 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -51,6 +51,8 @@ struct lapbethdev {
 	struct list_head	node;
 	struct net_device	*ethdev;	/* link to ethernet device */
 	struct net_device	*axdev;		/* lapbeth device (lapb#) */
+	bool			up;
+	spinlock_t		up_lock;	/* Protects "up" */
 };
 
 static LIST_HEAD(lapbeth_devices);
@@ -101,8 +103,9 @@ static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	rcu_read_lock();
 	lapbeth = lapbeth_get_x25_dev(dev);
 	if (!lapbeth)
-		goto drop_unlock;
-	if (!netif_running(lapbeth->axdev))
+		goto drop_unlock_rcu;
+	spin_lock_bh(&lapbeth->up_lock);
+	if (!lapbeth->up)
 		goto drop_unlock;
 
 	len = skb->data[0] + skb->data[1] * 256;
@@ -117,11 +120,14 @@ static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		goto drop_unlock;
 	}
 out:
+	spin_unlock_bh(&lapbeth->up_lock);
 	rcu_read_unlock();
 	return 0;
 drop_unlock:
 	kfree_skb(skb);
 	goto out;
+drop_unlock_rcu:
+	rcu_read_unlock();
 drop:
 	kfree_skb(skb);
 	return 0;
@@ -151,13 +157,11 @@ static int lapbeth_data_indication(struct net_device *dev, struct sk_buff *skb)
 static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;
 
-	/*
-	 * Just to be *really* sure not to send anything if the interface
-	 * is down, the ethernet device may have gone.
-	 */
-	if (!netif_running(dev))
+	spin_lock_bh(&lapbeth->up_lock);
+	if (!lapbeth->up)
 		goto drop;
 
 	/* There should be a pseudo header of 1 byte added by upper layers.
@@ -194,6 +198,7 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
 		goto drop;
 	}
 out:
+	spin_unlock_bh(&lapbeth->up_lock);
 	return NETDEV_TX_OK;
 drop:
 	kfree_skb(skb);
@@ -285,6 +290,7 @@ static const struct lapb_register_struct lapbeth_callbacks = {
  */
 static int lapbeth_open(struct net_device *dev)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;
 
 	if ((err = lapb_register(dev, &lapbeth_callbacks)) != LAPB_OK) {
@@ -292,13 +298,22 @@ static int lapbeth_open(struct net_device *dev)
 		return -ENODEV;
 	}
 
+	spin_lock_bh(&lapbeth->up_lock);
+	lapbeth->up = true;
+	spin_unlock_bh(&lapbeth->up_lock);
+
 	return 0;
 }
 
 static int lapbeth_close(struct net_device *dev)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;
 
+	spin_lock_bh(&lapbeth->up_lock);
+	lapbeth->up = false;
+	spin_unlock_bh(&lapbeth->up_lock);
+
 	if ((err = lapb_unregister(dev)) != LAPB_OK)
 		pr_err("lapb_unregister error: %d\n", err);
 
@@ -356,6 +371,9 @@ static int lapbeth_new_device(struct net_device *dev)
 	dev_hold(dev);
 	lapbeth->ethdev = dev;
 
+	lapbeth->up = false;
+	spin_lock_init(&lapbeth->up_lock);
+
 	rc = -EIO;
 	if (register_netdevice(ndev))
 		goto fail;
@@ -403,8 +421,8 @@ static int lapbeth_device_event(struct notifier_block *this,
 		if (lapbeth_get_x25_dev(dev) == NULL)
 			lapbeth_new_device(dev);
 		break;
-	case NETDEV_DOWN:	
-		/* ethernet device closed -> close LAPB interface */
+	case NETDEV_GOING_DOWN:
+		/* ethernet device closes -> close LAPB interface */
 		lapbeth = lapbeth_get_x25_dev(dev);
 		if (lapbeth) 
 			dev_close(lapbeth->axdev);
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 4dc7bd7e02b6..ee2baa2b2fcf 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -176,7 +176,7 @@ static void fdp_nci_set_data_pkt_counter(struct nci_dev *ndev,
  *
  * The firmware will be analyzed and applied when we send NCI_OP_PROP_PATCH_CMD
  * command with NCI_PATCH_TYPE_EOT parameter. The device will send a
- * NFCC_PATCH_NTF packaet and a NCI_OP_CORE_RESET_NTF packet.
+ * NFCC_PATCH_NTF packet and a NCI_OP_CORE_RESET_NTF packet.
  */
 static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
 {
@@ -236,15 +236,12 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
 
 static int fdp_nci_open(struct nci_dev *ndev)
 {
-	int r;
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
 	struct device *dev = &info->phy->i2c_dev->dev;
 
 	dev_dbg(dev, "%s\n", __func__);
 
-	r = info->phy_ops->enable(info->phy);
-
-	return r;
+	return info->phy_ops->enable(info->phy);
 }
 
 static int fdp_nci_close(struct nci_dev *ndev)
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index 6e411821583e..bc0a27de69d4 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -79,6 +79,9 @@ static const void *of_get_mac_addr_nvmem(struct device_node *np)
 }
 
 /**
+ * of_get_mac_address()
+ * @np:		Caller's Device Node
+ *
  * Search the device tree for the best MAC address to use.  'mac-address' is
  * checked first, because that is supposed to contain to "most recent" MAC
  * address. If that isn't set, then 'local-mac-address' is checked next,
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index 75463c2e2b86..fa636951169e 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -1404,8 +1404,8 @@ static int idtcm_set_pll_mode(struct idtcm_channel *channel,
 
 /* PTP Hardware Clock interface */
 
-/**
- * @brief Maximum absolute value for write phase offset in picoseconds
+/*
+ * Maximum absolute value for write phase offset in picoseconds
  *
  * Destination signed register is 32-bit register in resolution of 50ps
  *
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index ce10ecd41ba0..a17e8cc642c5 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/ptp_pch.h>
 #include <linux/slab.h>
 
 #define STATION_ADDR_LEN	20
@@ -36,7 +37,8 @@ enum pch_status {
 	PCH_FAILED,
 	PCH_UNSUPPORTED,
 };
-/**
+
+/*
  * struct pch_ts_regs - IEEE 1588 registers
  */
 struct pch_ts_regs {
@@ -102,7 +104,8 @@ struct pch_ts_regs {
 
 #define PCH_IEEE1588_ETH	(1 << 0)
 #define PCH_IEEE1588_CAN	(1 << 1)
-/**
+
+/*
  * struct pch_dev - Driver private data
  */
 struct pch_dev {
@@ -119,7 +122,7 @@ struct pch_dev {
 	spinlock_t register_lock;
 };
 
-/**
+/*
  * struct pch_params - 1588 module parameter
  */
 struct pch_params {
@@ -179,17 +182,6 @@ static inline void pch_block_reset(struct pch_dev *chip)
 	iowrite32(val, (&chip->regs->control));
 }
 
-u32 pch_ch_control_read(struct pci_dev *pdev)
-{
-	struct pch_dev *chip = pci_get_drvdata(pdev);
-	u32 val;
-
-	val = ioread32(&chip->regs->ch_control);
-
-	return val;
-}
-EXPORT_SYMBOL(pch_ch_control_read);
-
 void pch_ch_control_write(struct pci_dev *pdev, u32 val)
 {
 	struct pch_dev *chip = pci_get_drvdata(pdev);
@@ -296,6 +288,7 @@ static void pch_reset(struct pch_dev *chip)
  *				    IEEE 1588 hardware when looking at PTP
  *				    traffic on the  ethernet interface
  * @addr:	dress which contain the column separated address to be used.
+ * @pdev:	PCI device.
  */
 int pch_set_station_address(u8 *addr, struct pci_dev *pdev)
 {
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index a814698387bc..6954d4e831a3 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1453,7 +1453,7 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 
 static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 					  struct qeth_qdio_out_q *queue,
-					  bool drain)
+					  bool drain, int budget)
 {
 	struct qeth_qdio_out_buffer *buf, *tmp;
 
@@ -1465,7 +1465,7 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 			if (drain)
 				qeth_notify_skbs(queue, buf,
 						 TX_NOTIFY_GENERALERROR);
-			qeth_tx_complete_buf(buf, drain, 0);
+			qeth_tx_complete_buf(buf, drain, budget);
 
 			list_del(&buf->list_entry);
 			kmem_cache_free(qeth_qdio_outbuf_cache, buf);
@@ -1477,7 +1477,7 @@ static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free)
 {
 	int j;
 
-	qeth_tx_complete_pending_bufs(q->card, q, true);
+	qeth_tx_complete_pending_bufs(q->card, q, true, 0);
 
 	for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
 		if (!q->bufs[j])
@@ -2590,11 +2590,12 @@ static int qeth_ulp_setup(struct qeth_card *card)
 	return qeth_send_control_data(card, iob, qeth_ulp_setup_cb, NULL);
 }
 
-static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
+static int qeth_alloc_out_buf(struct qeth_qdio_out_q *q, unsigned int bidx,
+			      gfp_t gfp)
 {
 	struct qeth_qdio_out_buffer *newbuf;
 
-	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC);
+	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, gfp);
 	if (!newbuf)
 		return -ENOMEM;
 
@@ -2629,7 +2630,7 @@ static struct qeth_qdio_out_q *qeth_alloc_output_queue(void)
 		goto err_qdio_bufs;
 
 	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) {
-		if (qeth_init_qdio_out_buf(q, i))
+		if (qeth_alloc_out_buf(q, i, GFP_KERNEL))
 			goto err_out_bufs;
 	}
 
@@ -6088,7 +6089,8 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 
 				/* Prepare the queue slot for immediate re-use: */
 				qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements);
-				if (qeth_init_qdio_out_buf(queue, bidx)) {
+				if (qeth_alloc_out_buf(queue, bidx,
+						       GFP_ATOMIC)) {
 					QETH_CARD_TEXT(card, 2, "outofbuf");
 					qeth_schedule_recovery(card);
 				}
@@ -6150,7 +6152,7 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 		unsigned int bytes = 0;
 		int completed;
 
-		qeth_tx_complete_pending_bufs(card, queue, false);
+		qeth_tx_complete_pending_bufs(card, queue, false, budget);
 
 		if (qeth_out_queue_is_empty(queue)) {
 			napi_complete(napi);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index dd441eaec66e..35b42275a06c 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1123,24 +1123,6 @@ out:
 	return 0;
 }
 
-static int qeth_l3_vlan_rx_add_vid(struct net_device *dev,
-				   __be16 proto, u16 vid)
-{
-	struct qeth_card *card = dev->ml_priv;
-
-	QETH_CARD_TEXT_(card, 4, "aid:%d", vid);
-	return 0;
-}
-
-static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
-				    __be16 proto, u16 vid)
-{
-	struct qeth_card *card = dev->ml_priv;
-
-	QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
-	return 0;
-}
-
 static void qeth_l3_set_promisc_mode(struct qeth_card *card)
 {
 	bool enable = card->dev->flags & IFF_PROMISC;
@@ -1861,8 +1843,6 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
 	.ndo_do_ioctl		= qeth_do_ioctl,
 	.ndo_fix_features	= qeth_fix_features,
 	.ndo_set_features	= qeth_set_features,
-	.ndo_vlan_rx_add_vid	= qeth_l3_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid   = qeth_l3_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= qeth_tx_timeout,
 };
 
@@ -1878,8 +1858,6 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_do_ioctl		= qeth_do_ioctl,
 	.ndo_fix_features	= qeth_fix_features,
 	.ndo_set_features	= qeth_set_features,
-	.ndo_vlan_rx_add_vid	= qeth_l3_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid   = qeth_l3_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= qeth_tx_timeout,
 	.ndo_neigh_setup	= qeth_l3_neigh_setup,
 };
@@ -1933,8 +1911,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 
 	card->dev->needed_headroom = headroom;
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
-				NETIF_F_HW_VLAN_CTAG_RX |
-				NETIF_F_HW_VLAN_CTAG_FILTER;
+				NETIF_F_HW_VLAN_CTAG_RX;
 
 	netif_keep_dst(card->dev);
 	if (card->dev->hw_features & (NETIF_F_TSO | NETIF_F_TSO6))
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index b22f73d7bfc4..6e798229fe25 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -78,8 +78,6 @@ source "drivers/staging/clocking-wizard/Kconfig"
 
 source "drivers/staging/fbtft/Kconfig"
 
-source "drivers/staging/fsl-dpaa2/Kconfig"
-
 source "drivers/staging/most/Kconfig"
 
 source "drivers/staging/ks7010/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 2245059e69c7..8d4d9812ecdf 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_GS_FPGABOOT)	+= gs_fpgaboot/
 obj-$(CONFIG_UNISYSSPAR)	+= unisys/
 obj-$(CONFIG_COMMON_CLK_XLNX_CLKWZRD)	+= clocking-wizard/
 obj-$(CONFIG_FB_TFT)		+= fbtft/
-obj-$(CONFIG_FSL_DPAA2)		+= fsl-dpaa2/
 obj-$(CONFIG_MOST)		+= most/
 obj-$(CONFIG_KS7010)		+= ks7010/
 obj-$(CONFIG_GREYBUS)		+= greybus/
diff --git a/drivers/staging/fsl-dpaa2/Kconfig b/drivers/staging/fsl-dpaa2/Kconfig
deleted file mode 100644
index 244237bb068a..000000000000
--- a/drivers/staging/fsl-dpaa2/Kconfig
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Freescale DataPath Acceleration Architecture Gen2 (DPAA2) drivers
-#
-
-config FSL_DPAA2
-	bool "Freescale DPAA2 devices"
-	depends on FSL_MC_BUS
-	help
-	  Build drivers for Freescale DataPath Acceleration
-	  Architecture (DPAA2) family of SoCs.
-
-config FSL_DPAA2_ETHSW
-	tristate "Freescale DPAA2 Ethernet Switch"
-	depends on FSL_DPAA2
-	depends on NET_SWITCHDEV
-	help
-	  Driver for Freescale DPAA2 Ethernet Switch. Select
-	  BRIDGE to have support for bridge tools.
diff --git a/drivers/staging/fsl-dpaa2/Makefile b/drivers/staging/fsl-dpaa2/Makefile
deleted file mode 100644
index 9645db7689c9..000000000000
--- a/drivers/staging/fsl-dpaa2/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Freescale DataPath Acceleration Architecture Gen2 (DPAA2) drivers
-#
-
-obj-$(CONFIG_FSL_DPAA2_ETHSW)		+= ethsw/
diff --git a/drivers/staging/fsl-dpaa2/ethsw/Makefile b/drivers/staging/fsl-dpaa2/ethsw/Makefile
deleted file mode 100644
index f6f2cf798faf..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Freescale DPAA2 Ethernet Switch
-#
-# Copyright 2014-2017 Freescale Semiconductor Inc.
-# Copyright 2017-2018 NXP
-
-obj-$(CONFIG_FSL_DPAA2_ETHSW) += dpaa2-ethsw.o
-
-dpaa2-ethsw-objs := ethsw.o ethsw-ethtool.o dpsw.o
diff --git a/drivers/staging/fsl-dpaa2/ethsw/README b/drivers/staging/fsl-dpaa2/ethsw/README
deleted file mode 100644
index b48dcbf7c5fb..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/README
+++ /dev/null
@@ -1,106 +0,0 @@
-DPAA2 Ethernet Switch driver
-============================
-
-This file provides documentation for the DPAA2 Ethernet Switch driver
-
-
-Contents
-========
-	Supported Platforms
-	Architecture Overview
-	Creating an Ethernet Switch
-	Features
-
-
-	Supported Platforms
-===================
-This driver provides networking support for Freescale LS2085A, LS2088A
-DPAA2 SoCs.
-
-
-Architecture Overview
-=====================
-The Ethernet Switch in the DPAA2 architecture consists of several hardware
-resources that provide the functionality. These are allocated and
-configured via the Management Complex (MC) portals. MC abstracts most of
-these resources as DPAA2 objects and exposes ABIs through which they can
-be configured and controlled.
-
-For a more detailed description of the DPAA2 architecture and its object
-abstractions see:
-	drivers/staging/fsl-mc/README.txt
-
-The Ethernet Switch is built on top of a Datapath Switch (DPSW) object.
-
-Configuration interface:
-
-          ---------------------
-         | DPAA2 Switch driver |
-          ---------------------
-                   .
-                   .
-              ----------
-             | DPSW API |
-              ----------
-                   .           software
- ================= . ==============
-                   .           hardware
-          ---------------------
-         | MC hardware portals |
-          ---------------------
-                   .
-                   .
-                 ------
-                | DPSW |
-                 ------
-
-Driver uses the switch device driver model and exposes each switch port as
-a network interface, which can be included in a bridge. Traffic switched
-between ports is offloaded into the hardware. Exposed network interfaces
-are not used for I/O, they are used just for configuration. This
-limitation is going to be addressed in the future.
-
-The DPSW can have ports connected to DPNIs or to PHYs via DPMACs.
-
-
- [ethA]     [ethB]     [ethC]     [ethD]     [ethE]     [ethF]
-    :          :          :          :          :          :
-    :          :          :          :          :          :
-[eth drv]  [eth drv]  [                ethsw drv              ]
-    :          :          :          :          :          :        kernel
-========================================================================
-    :          :          :          :          :          :        hardware
- [DPNI]      [DPNI]     [============= DPSW =================]
-    |          |          |          |          |          |
-    |           ----------           |       [DPMAC]    [DPMAC]
-     -------------------------------            |          |
-                                                |          |
-                                              [PHY]      [PHY]
-
-For a more detailed description of the Ethernet switch device driver model
-see:
-	Documentation/networking/switchdev.rst
-
-Creating an Ethernet Switch
-===========================
-A device is created for the switch objects probed on the MC bus. Each DPSW
-has a number of properties which determine the configuration options and
-associated hardware resources.
-
-A DPSW object (and the other DPAA2 objects needed for a DPAA2 switch) can
-be added to a container on the MC bus in one of two ways: statically,
-through a Datapath Layout Binary file (DPL) that is parsed by MC at boot
-time; or created dynamically at runtime, via the DPAA2 objects APIs.
-
-Features
-========
-Driver configures DPSW to perform hardware switching offload of
-unicast/multicast/broadcast (VLAN tagged or untagged) traffic between its
-ports.
-
-It allows configuration of hardware learning, flooding, multicast groups,
-port VLAN configuration and STP state.
-
-Static entries can be added/removed from the FDB.
-
-Hardware statistics for each port are provided through ethtool -S option.
diff --git a/drivers/staging/fsl-dpaa2/ethsw/TODO b/drivers/staging/fsl-dpaa2/ethsw/TODO
deleted file mode 100644
index 4d46857b0b2b..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/TODO
+++ /dev/null
@@ -1,13 +0,0 @@
-* Add I/O capabilities on switch port netdevices. This will allow control
-traffic to reach the CPU.
-* Add ACL to redirect control traffic to CPU.
-* Add support for multiple FDBs and switch port partitioning
-* MC firmware uprev; the DPAA2 objects used by the Ethernet Switch driver
-need to be kept in sync with binary interface changes in MC
-* refine README file
-* cleanup
-
-NOTE: At least first three of the above are required before getting the
-DPAA2 Ethernet Switch driver out of staging. Another requirement is that
-dpio driver is moved to drivers/soc (this is required for I/O).
-
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
deleted file mode 100644
index 9cfd8a8e0197..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ /dev/null
@@ -1,594 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
- *
- */
-
-#ifndef __FSL_DPSW_H
-#define __FSL_DPSW_H
-
-/* Data Path L2-Switch API
- * Contains API for handling DPSW topology and functionality
- */
-
-struct fsl_mc_io;
-
-/**
- * DPSW general definitions
- */
-
-/**
- * Maximum number of traffic class priorities
- */
-#define DPSW_MAX_PRIORITIES	8
-/**
- * Maximum number of interfaces
- */
-#define DPSW_MAX_IF		64
-
-int dpsw_open(struct fsl_mc_io *mc_io,
-	      u32 cmd_flags,
-	      int dpsw_id,
-	      u16 *token);
-
-int dpsw_close(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token);
-
-/**
- * DPSW options
- */
-
-/**
- * Disable flooding
- */
-#define DPSW_OPT_FLOODING_DIS		0x0000000000000001ULL
-/**
- * Disable Multicast
- */
-#define DPSW_OPT_MULTICAST_DIS		0x0000000000000004ULL
-/**
- * Support control interface
- */
-#define DPSW_OPT_CTRL_IF_DIS		0x0000000000000010ULL
-/**
- * Disable flooding metering
- */
-#define DPSW_OPT_FLOODING_METERING_DIS  0x0000000000000020ULL
-/**
- * Enable metering
- */
-#define DPSW_OPT_METERING_EN            0x0000000000000040ULL
-
-/**
- * enum dpsw_component_type - component type of a bridge
- * @DPSW_COMPONENT_TYPE_C_VLAN: A C-VLAN component of an
- *   enterprise VLAN bridge or of a Provider Bridge used
- *   to process C-tagged frames
- * @DPSW_COMPONENT_TYPE_S_VLAN: An S-VLAN component of a
- *   Provider Bridge
- *
- */
-enum dpsw_component_type {
-	DPSW_COMPONENT_TYPE_C_VLAN = 0,
-	DPSW_COMPONENT_TYPE_S_VLAN
-};
-
-int dpsw_enable(struct fsl_mc_io *mc_io,
-		u32 cmd_flags,
-		u16 token);
-
-int dpsw_disable(struct fsl_mc_io *mc_io,
-		 u32 cmd_flags,
-		 u16 token);
-
-int dpsw_reset(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token);
-
-/**
- * DPSW IRQ Index and Events
- */
-
-#define DPSW_IRQ_INDEX_IF		0x0000
-#define DPSW_IRQ_INDEX_L2SW		0x0001
-
-/**
- * IRQ event - Indicates that the link state changed
- */
-#define DPSW_IRQ_EVENT_LINK_CHANGED	0x0001
-
-/**
- * struct dpsw_irq_cfg - IRQ configuration
- * @addr:	Address that must be written to signal a message-based interrupt
- * @val:	Value to write into irq_addr address
- * @irq_num: A user defined number associated with this IRQ
- */
-struct dpsw_irq_cfg {
-	u64 addr;
-	u32 val;
-	int irq_num;
-};
-
-int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u8 en);
-
-int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
-		      u32 cmd_flags,
-		      u16 token,
-		      u8 irq_index,
-		      u32 mask);
-
-int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u32 *status);
-
-int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u8 irq_index,
-			  u32 status);
-
-/**
- * struct dpsw_attr - Structure representing DPSW attributes
- * @id: DPSW object ID
- * @options: Enable/Disable DPSW features
- * @max_vlans: Maximum Number of VLANs
- * @max_meters_per_if:  Number of meters per interface
- * @max_fdbs: Maximum Number of FDBs
- * @max_fdb_entries: Number of FDB entries for default FDB table;
- *			0 - indicates default 1024 entries.
- * @fdb_aging_time: Default FDB aging time for default FDB table;
- *			0 - indicates default 300 seconds
- * @max_fdb_mc_groups: Number of multicast groups in each FDB table;
- *			0 - indicates default 32
- * @mem_size: DPSW frame storage memory size
- * @num_ifs: Number of interfaces
- * @num_vlans: Current number of VLANs
- * @num_fdbs: Current number of FDBs
- * @component_type: Component type of this bridge
- */
-struct dpsw_attr {
-	int id;
-	u64 options;
-	u16 max_vlans;
-	u8 max_meters_per_if;
-	u8 max_fdbs;
-	u16 max_fdb_entries;
-	u16 fdb_aging_time;
-	u16 max_fdb_mc_groups;
-	u16 num_ifs;
-	u16 mem_size;
-	u16 num_vlans;
-	u8 num_fdbs;
-	enum dpsw_component_type component_type;
-};
-
-int dpsw_get_attributes(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			struct dpsw_attr *attr);
-
-/**
- * enum dpsw_action - Action selection for special/control frames
- * @DPSW_ACTION_DROP: Drop frame
- * @DPSW_ACTION_REDIRECT: Redirect frame to control port
- */
-enum dpsw_action {
-	DPSW_ACTION_DROP = 0,
-	DPSW_ACTION_REDIRECT = 1
-};
-
-/**
- * Enable auto-negotiation
- */
-#define DPSW_LINK_OPT_AUTONEG		0x0000000000000001ULL
-/**
- * Enable half-duplex mode
- */
-#define DPSW_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
-/**
- * Enable pause frames
- */
-#define DPSW_LINK_OPT_PAUSE		0x0000000000000004ULL
-/**
- * Enable a-symmetric pause frames
- */
-#define DPSW_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
-
-/**
- * struct dpsw_link_cfg - Structure representing DPSW link configuration
- * @rate: Rate
- * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
- */
-struct dpsw_link_cfg {
-	u32 rate;
-	u64 options;
-};
-
-int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
-			 struct dpsw_link_cfg *cfg);
-/**
- * struct dpsw_link_state - Structure representing DPSW link state
- * @rate: Rate
- * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
- * @up: 0 - covers two cases: down and disconnected, 1 - up
- */
-struct dpsw_link_state {
-	u32 rate;
-	u64 options;
-	u8 up;
-};
-
-int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 if_id,
-			   struct dpsw_link_state *state);
-
-int dpsw_if_set_flooding(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
-			 u8 en);
-
-int dpsw_if_set_broadcast(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u16 if_id,
-			  u8 en);
-
-/**
- * struct dpsw_tci_cfg - Tag Control Information (TCI) configuration
- * @pcp: Priority Code Point (PCP): a 3-bit field which refers
- *		 to the IEEE 802.1p priority
- * @dei: Drop Eligible Indicator (DEI): a 1-bit field. May be used
- *		 separately or in conjunction with PCP to indicate frames
- *		 eligible to be dropped in the presence of congestion
- * @vlan_id: VLAN Identifier (VID): a 12-bit field specifying the VLAN
- *			to which the frame belongs. The hexadecimal values
- *			of 0x000 and 0xFFF are reserved;
- *			all other values may be used as VLAN identifiers,
- *			allowing up to 4,094 VLANs
- */
-struct dpsw_tci_cfg {
-	u8 pcp;
-	u8 dei;
-	u16 vlan_id;
-};
-
-int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    const struct dpsw_tci_cfg *cfg);
-
-int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    struct dpsw_tci_cfg *cfg);
-
-/**
- * enum dpsw_stp_state - Spanning Tree Protocol (STP) states
- * @DPSW_STP_STATE_BLOCKING: Blocking state
- * @DPSW_STP_STATE_LISTENING: Listening state
- * @DPSW_STP_STATE_LEARNING: Learning state
- * @DPSW_STP_STATE_FORWARDING: Forwarding state
- *
- */
-enum dpsw_stp_state {
-	DPSW_STP_STATE_DISABLED = 0,
-	DPSW_STP_STATE_LISTENING = 1,
-	DPSW_STP_STATE_LEARNING = 2,
-	DPSW_STP_STATE_FORWARDING = 3,
-	DPSW_STP_STATE_BLOCKING = 0
-};
-
-/**
- * struct dpsw_stp_cfg - Spanning Tree Protocol (STP) Configuration
- * @vlan_id: VLAN ID STP state
- * @state: STP state
- */
-struct dpsw_stp_cfg {
-	u16 vlan_id;
-	enum dpsw_stp_state state;
-};
-
-int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    const struct dpsw_stp_cfg *cfg);
-
-/**
- * enum dpsw_accepted_frames - Types of frames to accept
- * @DPSW_ADMIT_ALL: The device accepts VLAN tagged, untagged and
- *			priority tagged frames
- * @DPSW_ADMIT_ONLY_VLAN_TAGGED: The device discards untagged frames or
- *			Priority-Tagged frames received on this interface.
- *
- */
-enum dpsw_accepted_frames {
-	DPSW_ADMIT_ALL = 1,
-	DPSW_ADMIT_ONLY_VLAN_TAGGED = 3
-};
-
-/**
- * enum dpsw_counter  - Counters types
- * @DPSW_CNT_ING_FRAME: Counts ingress frames
- * @DPSW_CNT_ING_BYTE: Counts ingress bytes
- * @DPSW_CNT_ING_FLTR_FRAME: Counts filtered ingress frames
- * @DPSW_CNT_ING_FRAME_DISCARD: Counts discarded ingress frame
- * @DPSW_CNT_ING_MCAST_FRAME: Counts ingress multicast frames
- * @DPSW_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes
- * @DPSW_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames
- * @DPSW_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes
- * @DPSW_CNT_EGR_FRAME: Counts egress frames
- * @DPSW_CNT_EGR_BYTE: Counts egress bytes
- * @DPSW_CNT_EGR_FRAME_DISCARD: Counts discarded egress frames
- * @DPSW_CNT_EGR_STP_FRAME_DISCARD: Counts egress STP discarded frames
- * @DPSW_CNT_ING_NO_BUFF_DISCARD: Counts ingress no buffer discarded frames
- */
-enum dpsw_counter {
-	DPSW_CNT_ING_FRAME = 0x0,
-	DPSW_CNT_ING_BYTE = 0x1,
-	DPSW_CNT_ING_FLTR_FRAME = 0x2,
-	DPSW_CNT_ING_FRAME_DISCARD = 0x3,
-	DPSW_CNT_ING_MCAST_FRAME = 0x4,
-	DPSW_CNT_ING_MCAST_BYTE = 0x5,
-	DPSW_CNT_ING_BCAST_FRAME = 0x6,
-	DPSW_CNT_ING_BCAST_BYTES = 0x7,
-	DPSW_CNT_EGR_FRAME = 0x8,
-	DPSW_CNT_EGR_BYTE = 0x9,
-	DPSW_CNT_EGR_FRAME_DISCARD = 0xa,
-	DPSW_CNT_EGR_STP_FRAME_DISCARD = 0xb,
-	DPSW_CNT_ING_NO_BUFF_DISCARD = 0xc,
-};
-
-int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 if_id,
-			enum dpsw_counter type,
-			u64 *counter);
-
-int dpsw_if_enable(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   u16 if_id);
-
-int dpsw_if_disable(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id);
-
-int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 if_id,
-				 u16 frame_length);
-
-/**
- * struct dpsw_vlan_cfg - VLAN Configuration
- * @fdb_id: Forwarding Data Base
- */
-struct dpsw_vlan_cfg {
-	u16 fdb_id;
-};
-
-int dpsw_vlan_add(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 vlan_id,
-		  const struct dpsw_vlan_cfg *cfg);
-
-/**
- * struct dpsw_vlan_if_cfg - Set of VLAN Interfaces
- * @num_ifs: The number of interfaces that are assigned to the egress
- *		list for this VLAN
- * @if_id: The set of interfaces that are
- *		assigned to the egress list for this VLAN
- */
-struct dpsw_vlan_if_cfg {
-	u16 num_ifs;
-	u16 if_id[DPSW_MAX_IF];
-};
-
-int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id,
-		     const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 vlan_id,
-			      const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 vlan_id,
-			const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 vlan_id,
-				 const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id);
-
-/**
- * enum dpsw_fdb_entry_type - FDB Entry type - Static/Dynamic
- * @DPSW_FDB_ENTRY_STATIC: Static entry
- * @DPSW_FDB_ENTRY_DINAMIC: Dynamic entry
- */
-enum dpsw_fdb_entry_type {
-	DPSW_FDB_ENTRY_STATIC = 0,
-	DPSW_FDB_ENTRY_DINAMIC = 1
-};
-
-/**
- * struct dpsw_fdb_unicast_cfg - Unicast entry configuration
- * @type: Select static or dynamic entry
- * @mac_addr: MAC address
- * @if_egress: Egress interface ID
- */
-struct dpsw_fdb_unicast_cfg {
-	enum dpsw_fdb_entry_type type;
-	u8 mac_addr[6];
-	u16 if_egress;
-};
-
-int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 fdb_id,
-			 const struct dpsw_fdb_unicast_cfg *cfg);
-
-int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
-			    u32 cmd_flags,
-			    u16 token,
-			    u16 fdb_id,
-			    const struct dpsw_fdb_unicast_cfg *cfg);
-
-#define DPSW_FDB_ENTRY_TYPE_DYNAMIC  BIT(0)
-#define DPSW_FDB_ENTRY_TYPE_UNICAST  BIT(1)
-
-/**
- * struct fdb_dump_entry - fdb snapshot entry
- * @mac_addr: MAC address
- * @type: bit0 - DINAMIC(1)/STATIC(0), bit1 - UNICAST(1)/MULTICAST(0)
- * @if_info: unicast - egress interface, multicast - number of egress interfaces
- * @if_mask: multicast - egress interface mask
- */
-struct fdb_dump_entry {
-	u8 mac_addr[6];
-	u8 type;
-	u8 if_info;
-	u8 if_mask[8];
-};
-
-int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 fdb_id,
-		  u64 iova_addr,
-		  u32 iova_size,
-		  u16 *num_entries);
-
-/**
- * struct dpsw_fdb_multicast_cfg - Multi-cast entry configuration
- * @type: Select static or dynamic entry
- * @mac_addr: MAC address
- * @num_ifs: Number of external and internal interfaces
- * @if_id: Egress interface IDs
- */
-struct dpsw_fdb_multicast_cfg {
-	enum dpsw_fdb_entry_type type;
-	u8 mac_addr[6];
-	u16 num_ifs;
-	u16 if_id[DPSW_MAX_IF];
-};
-
-int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 fdb_id,
-			   const struct dpsw_fdb_multicast_cfg *cfg);
-
-int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 fdb_id,
-			      const struct dpsw_fdb_multicast_cfg *cfg);
-
-/**
- * enum dpsw_fdb_learning_mode - Auto-learning modes
- * @DPSW_FDB_LEARNING_MODE_DIS: Disable Auto-learning
- * @DPSW_FDB_LEARNING_MODE_HW: Enable HW auto-Learning
- * @DPSW_FDB_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU
- * @DPSW_FDB_LEARNING_MODE_SECURE: Enable secure learning by CPU
- *
- *	NONE - SECURE LEARNING
- *	SMAC found	DMAC found	CTLU Action
- *	v		v	Forward frame to
- *						1.  DMAC destination
- *	-		v	Forward frame to
- *						1.  DMAC destination
- *						2.  Control interface
- *	v		-	Forward frame to
- *						1.  Flooding list of interfaces
- *	-		-	Forward frame to
- *						1.  Flooding list of interfaces
- *						2.  Control interface
- *	SECURE LEARING
- *	SMAC found	DMAC found	CTLU Action
- *	v		v		Forward frame to
- *						1.  DMAC destination
- *	-		v		Forward frame to
- *						1.  Control interface
- *	v		-		Forward frame to
- *						1.  Flooding list of interfaces
- *	-		-		Forward frame to
- *						1.  Control interface
- */
-enum dpsw_fdb_learning_mode {
-	DPSW_FDB_LEARNING_MODE_DIS = 0,
-	DPSW_FDB_LEARNING_MODE_HW = 1,
-	DPSW_FDB_LEARNING_MODE_NON_SECURE = 2,
-	DPSW_FDB_LEARNING_MODE_SECURE = 3
-};
-
-int dpsw_fdb_set_learning_mode(struct fsl_mc_io *mc_io,
-			       u32 cmd_flags,
-			       u16 token,
-			       u16 fdb_id,
-			       enum dpsw_fdb_learning_mode mode);
-
-/**
- * struct dpsw_fdb_attr - FDB Attributes
- * @max_fdb_entries: Number of FDB entries
- * @fdb_aging_time: Aging time in seconds
- * @learning_mode: Learning mode
- * @num_fdb_mc_groups: Current number of multicast groups
- * @max_fdb_mc_groups: Maximum number of multicast groups
- */
-struct dpsw_fdb_attr {
-	u16 max_fdb_entries;
-	u16 fdb_aging_time;
-	enum dpsw_fdb_learning_mode learning_mode;
-	u16 num_fdb_mc_groups;
-	u16 max_fdb_mc_groups;
-};
-
-int dpsw_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver);
-
-int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			      u16 if_id, u8 mac_addr[6]);
-
-int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6]);
-
-int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6]);
-
-#endif /* __FSL_DPSW_H */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
deleted file mode 100644
index 703055e063ff..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ /dev/null
@@ -1,1839 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DPAA2 Ethernet Switch driver
- *
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/interrupt.h>
-#include <linux/msi.h>
-#include <linux/kthread.h>
-#include <linux/workqueue.h>
-
-#include <linux/fsl/mc.h>
-
-#include "ethsw.h"
-
-/* Minimal supported DPSW version */
-#define DPSW_MIN_VER_MAJOR		8
-#define DPSW_MIN_VER_MINOR		1
-
-#define DEFAULT_VLAN_ID			1
-
-static int dpaa2_switch_add_vlan(struct ethsw_core *ethsw, u16 vid)
-{
-	int err;
-
-	struct dpsw_vlan_cfg	vcfg = {
-		.fdb_id = 0,
-	};
-
-	err = dpsw_vlan_add(ethsw->mc_io, 0,
-			    ethsw->dpsw_handle, vid, &vcfg);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_vlan_add err %d\n", err);
-		return err;
-	}
-	ethsw->vlans[vid] = ETHSW_VLAN_MEMBER;
-
-	return 0;
-}
-
-static bool dpaa2_switch_port_is_up(struct ethsw_port_priv *port_priv)
-{
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_link_state state;
-	int err;
-
-	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
-				     port_priv->ethsw_data->dpsw_handle,
-				     port_priv->idx, &state);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
-		return true;
-	}
-
-	WARN_ONCE(state.up > 1, "Garbage read into link_state");
-
-	return state.up ? true : false;
-}
-
-static int dpaa2_switch_port_set_pvid(struct ethsw_port_priv *port_priv, u16 pvid)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_tci_cfg tci_cfg = { 0 };
-	bool up;
-	int err, ret;
-
-	err = dpsw_if_get_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			      port_priv->idx, &tci_cfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_get_tci err %d\n", err);
-		return err;
-	}
-
-	tci_cfg.vlan_id = pvid;
-
-	/* Interface needs to be down to change PVID */
-	up = dpaa2_switch_port_is_up(port_priv);
-	if (up) {
-		err = dpsw_if_disable(ethsw->mc_io, 0,
-				      ethsw->dpsw_handle,
-				      port_priv->idx);
-		if (err) {
-			netdev_err(netdev, "dpsw_if_disable err %d\n", err);
-			return err;
-		}
-	}
-
-	err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			      port_priv->idx, &tci_cfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_set_tci err %d\n", err);
-		goto set_tci_error;
-	}
-
-	/* Delete previous PVID info and mark the new one */
-	port_priv->vlans[port_priv->pvid] &= ~ETHSW_VLAN_PVID;
-	port_priv->vlans[pvid] |= ETHSW_VLAN_PVID;
-	port_priv->pvid = pvid;
-
-set_tci_error:
-	if (up) {
-		ret = dpsw_if_enable(ethsw->mc_io, 0,
-				     ethsw->dpsw_handle,
-				     port_priv->idx);
-		if (ret) {
-			netdev_err(netdev, "dpsw_if_enable err %d\n", ret);
-			return ret;
-		}
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
-				      u16 vid, u16 flags)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_vlan_if_cfg vcfg;
-	int err;
-
-	if (port_priv->vlans[vid]) {
-		netdev_warn(netdev, "VLAN %d already configured\n", vid);
-		return -EEXIST;
-	}
-
-	vcfg.num_ifs = 1;
-	vcfg.if_id[0] = port_priv->idx;
-	err = dpsw_vlan_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, vid, &vcfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_vlan_add_if err %d\n", err);
-		return err;
-	}
-
-	port_priv->vlans[vid] = ETHSW_VLAN_MEMBER;
-
-	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
-		err = dpsw_vlan_add_if_untagged(ethsw->mc_io, 0,
-						ethsw->dpsw_handle,
-						vid, &vcfg);
-		if (err) {
-			netdev_err(netdev,
-				   "dpsw_vlan_add_if_untagged err %d\n", err);
-			return err;
-		}
-		port_priv->vlans[vid] |= ETHSW_VLAN_UNTAGGED;
-	}
-
-	if (flags & BRIDGE_VLAN_INFO_PVID) {
-		err = dpaa2_switch_port_set_pvid(port_priv, vid);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_set_learning(struct ethsw_core *ethsw, bool enable)
-{
-	enum dpsw_fdb_learning_mode learn_mode;
-	int err;
-
-	if (enable)
-		learn_mode = DPSW_FDB_LEARNING_MODE_HW;
-	else
-		learn_mode = DPSW_FDB_LEARNING_MODE_DIS;
-
-	err = dpsw_fdb_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle, 0,
-					 learn_mode);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_fdb_set_learning_mode err %d\n", err);
-		return err;
-	}
-	ethsw->learning = enable;
-
-	return 0;
-}
-
-static int dpaa2_switch_port_set_flood(struct ethsw_port_priv *port_priv, bool enable)
-{
-	int err;
-
-	err = dpsw_if_set_flooding(port_priv->ethsw_data->mc_io, 0,
-				   port_priv->ethsw_data->dpsw_handle,
-				   port_priv->idx, enable);
-	if (err) {
-		netdev_err(port_priv->netdev,
-			   "dpsw_if_set_flooding err %d\n", err);
-		return err;
-	}
-	port_priv->flood = enable;
-
-	return 0;
-}
-
-static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state)
-{
-	struct dpsw_stp_cfg stp_cfg = {
-		.state = state,
-	};
-	int err;
-	u16 vid;
-
-	if (!netif_running(port_priv->netdev) || state == port_priv->stp_state)
-		return 0;	/* Nothing to do */
-
-	for (vid = 0; vid <= VLAN_VID_MASK; vid++) {
-		if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
-			stp_cfg.vlan_id = vid;
-			err = dpsw_if_set_stp(port_priv->ethsw_data->mc_io, 0,
-					      port_priv->ethsw_data->dpsw_handle,
-					      port_priv->idx, &stp_cfg);
-			if (err) {
-				netdev_err(port_priv->netdev,
-					   "dpsw_if_set_stp err %d\n", err);
-				return err;
-			}
-		}
-	}
-
-	port_priv->stp_state = state;
-
-	return 0;
-}
-
-static int dpaa2_switch_dellink(struct ethsw_core *ethsw, u16 vid)
-{
-	struct ethsw_port_priv *ppriv_local = NULL;
-	int i, err;
-
-	if (!ethsw->vlans[vid])
-		return -ENOENT;
-
-	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, vid);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_vlan_remove err %d\n", err);
-		return err;
-	}
-	ethsw->vlans[vid] = 0;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		ppriv_local = ethsw->ports[i];
-		ppriv_local->vlans[vid] = 0;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_port_fdb_add_uc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_unicast_cfg entry = {0};
-	int err;
-
-	entry.if_egress = port_priv->idx;
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	ether_addr_copy(entry.mac_addr, addr);
-
-	err = dpsw_fdb_add_unicast(port_priv->ethsw_data->mc_io, 0,
-				   port_priv->ethsw_data->dpsw_handle,
-				   0, &entry);
-	if (err)
-		netdev_err(port_priv->netdev,
-			   "dpsw_fdb_add_unicast err %d\n", err);
-	return err;
-}
-
-static int dpaa2_switch_port_fdb_del_uc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_unicast_cfg entry = {0};
-	int err;
-
-	entry.if_egress = port_priv->idx;
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	ether_addr_copy(entry.mac_addr, addr);
-
-	err = dpsw_fdb_remove_unicast(port_priv->ethsw_data->mc_io, 0,
-				      port_priv->ethsw_data->dpsw_handle,
-				      0, &entry);
-	/* Silently discard error for calling multiple times the del command */
-	if (err && err != -ENXIO)
-		netdev_err(port_priv->netdev,
-			   "dpsw_fdb_remove_unicast err %d\n", err);
-	return err;
-}
-
-static int dpaa2_switch_port_fdb_add_mc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_multicast_cfg entry = {0};
-	int err;
-
-	ether_addr_copy(entry.mac_addr, addr);
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	entry.num_ifs = 1;
-	entry.if_id[0] = port_priv->idx;
-
-	err = dpsw_fdb_add_multicast(port_priv->ethsw_data->mc_io, 0,
-				     port_priv->ethsw_data->dpsw_handle,
-				     0, &entry);
-	/* Silently discard error for calling multiple times the add command */
-	if (err && err != -ENXIO)
-		netdev_err(port_priv->netdev, "dpsw_fdb_add_multicast err %d\n",
-			   err);
-	return err;
-}
-
-static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_multicast_cfg entry = {0};
-	int err;
-
-	ether_addr_copy(entry.mac_addr, addr);
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	entry.num_ifs = 1;
-	entry.if_id[0] = port_priv->idx;
-
-	err = dpsw_fdb_remove_multicast(port_priv->ethsw_data->mc_io, 0,
-					port_priv->ethsw_data->dpsw_handle,
-					0, &entry);
-	/* Silently discard error for calling multiple times the del command */
-	if (err && err != -ENAVAIL)
-		netdev_err(port_priv->netdev,
-			   "dpsw_fdb_remove_multicast err %d\n", err);
-	return err;
-}
-
-static int dpaa2_switch_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-				     struct net_device *dev, const unsigned char *addr,
-				     u16 vid, u16 flags,
-				     struct netlink_ext_ack *extack)
-{
-	if (is_unicast_ether_addr(addr))
-		return dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
-						    addr);
-	else
-		return dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
-						    addr);
-}
-
-static int dpaa2_switch_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-				     struct net_device *dev,
-				     const unsigned char *addr, u16 vid)
-{
-	if (is_unicast_ether_addr(addr))
-		return dpaa2_switch_port_fdb_del_uc(netdev_priv(dev),
-						    addr);
-	else
-		return dpaa2_switch_port_fdb_del_mc(netdev_priv(dev),
-						    addr);
-}
-
-static void dpaa2_switch_port_get_stats(struct net_device *netdev,
-					struct rtnl_link_stats64 *stats)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	u64 tmp;
-	int err;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_FRAME, &stats->rx_packets);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_EGR_FRAME, &stats->tx_packets);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_BYTE, &stats->rx_bytes);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_EGR_BYTE, &stats->tx_bytes);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_FRAME_DISCARD,
-				  &stats->rx_dropped);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_FLTR_FRAME,
-				  &tmp);
-	if (err)
-		goto error;
-	stats->rx_dropped += tmp;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_EGR_FRAME_DISCARD,
-				  &stats->tx_dropped);
-	if (err)
-		goto error;
-
-	return;
-
-error:
-	netdev_err(netdev, "dpsw_if_get_counter err %d\n", err);
-}
-
-static bool dpaa2_switch_port_has_offload_stats(const struct net_device *netdev,
-						int attr_id)
-{
-	return (attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT);
-}
-
-static int dpaa2_switch_port_get_offload_stats(int attr_id,
-					       const struct net_device *netdev,
-					       void *sp)
-{
-	switch (attr_id) {
-	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
-		dpaa2_switch_port_get_stats((struct net_device *)netdev, sp);
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	err = dpsw_if_set_max_frame_length(port_priv->ethsw_data->mc_io,
-					   0,
-					   port_priv->ethsw_data->dpsw_handle,
-					   port_priv->idx,
-					   (u16)ETHSW_L2_MAX_FRM(mtu));
-	if (err) {
-		netdev_err(netdev,
-			   "dpsw_if_set_max_frame_length() err %d\n", err);
-		return err;
-	}
-
-	netdev->mtu = mtu;
-	return 0;
-}
-
-static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct dpsw_link_state state;
-	int err;
-
-	/* Interrupts are received even though no one issued an 'ifconfig up'
-	 * on the switch interface. Ignore these link state update interrupts
-	 */
-	if (!netif_running(netdev))
-		return 0;
-
-	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
-				     port_priv->ethsw_data->dpsw_handle,
-				     port_priv->idx, &state);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
-		return err;
-	}
-
-	WARN_ONCE(state.up > 1, "Garbage read into link_state");
-
-	if (state.up != port_priv->link_state) {
-		if (state.up)
-			netif_carrier_on(netdev);
-		else
-			netif_carrier_off(netdev);
-		port_priv->link_state = state.up;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_port_open(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	/* No need to allow Tx as control interface is disabled */
-	netif_tx_stop_all_queues(netdev);
-
-	/* Explicitly set carrier off, otherwise
-	 * netif_carrier_ok() will return true and cause 'ip link show'
-	 * to report the LOWER_UP flag, even though the link
-	 * notification wasn't even received.
-	 */
-	netif_carrier_off(netdev);
-
-	err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
-			     port_priv->ethsw_data->dpsw_handle,
-			     port_priv->idx);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_enable err %d\n", err);
-		return err;
-	}
-
-	/* sync carrier state */
-	err = dpaa2_switch_port_carrier_state_sync(netdev);
-	if (err) {
-		netdev_err(netdev,
-			   "dpaa2_switch_port_carrier_state_sync err %d\n", err);
-		goto err_carrier_sync;
-	}
-
-	return 0;
-
-err_carrier_sync:
-	dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
-			port_priv->ethsw_data->dpsw_handle,
-			port_priv->idx);
-	return err;
-}
-
-static int dpaa2_switch_port_stop(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
-			      port_priv->ethsw_data->dpsw_handle,
-			      port_priv->idx);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_disable err %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
-static netdev_tx_t dpaa2_switch_port_dropframe(struct sk_buff *skb,
-					       struct net_device *netdev)
-{
-	/* we don't support I/O for now, drop the frame */
-	dev_kfree_skb_any(skb);
-
-	return NETDEV_TX_OK;
-}
-
-static int dpaa2_switch_port_parent_id(struct net_device *dev,
-				       struct netdev_phys_item_id *ppid)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(dev);
-
-	ppid->id_len = 1;
-	ppid->id[0] = port_priv->ethsw_data->dev_id;
-
-	return 0;
-}
-
-static int dpaa2_switch_port_get_phys_name(struct net_device *netdev, char *name,
-					   size_t len)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	err = snprintf(name, len, "p%d", port_priv->idx);
-	if (err >= len)
-		return -EINVAL;
-
-	return 0;
-}
-
-struct ethsw_dump_ctx {
-	struct net_device *dev;
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-	int idx;
-};
-
-static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry,
-				    struct ethsw_dump_ctx *dump)
-{
-	int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC;
-	u32 portid = NETLINK_CB(dump->cb->skb).portid;
-	u32 seq = dump->cb->nlh->nlmsg_seq;
-	struct nlmsghdr *nlh;
-	struct ndmsg *ndm;
-
-	if (dump->idx < dump->cb->args[2])
-		goto skip;
-
-	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
-			sizeof(*ndm), NLM_F_MULTI);
-	if (!nlh)
-		return -EMSGSIZE;
-
-	ndm = nlmsg_data(nlh);
-	ndm->ndm_family  = AF_BRIDGE;
-	ndm->ndm_pad1    = 0;
-	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags   = NTF_SELF;
-	ndm->ndm_type    = 0;
-	ndm->ndm_ifindex = dump->dev->ifindex;
-	ndm->ndm_state   = is_dynamic ? NUD_REACHABLE : NUD_NOARP;
-
-	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, entry->mac_addr))
-		goto nla_put_failure;
-
-	nlmsg_end(dump->skb, nlh);
-
-skip:
-	dump->idx++;
-	return 0;
-
-nla_put_failure:
-	nlmsg_cancel(dump->skb, nlh);
-	return -EMSGSIZE;
-}
-
-static int dpaa2_switch_port_fdb_valid_entry(struct fdb_dump_entry *entry,
-					     struct ethsw_port_priv *port_priv)
-{
-	int idx = port_priv->idx;
-	int valid;
-
-	if (entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
-		valid = entry->if_info == port_priv->idx;
-	else
-		valid = entry->if_mask[idx / 8] & BIT(idx % 8);
-
-	return valid;
-}
-
-static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-				      struct net_device *net_dev,
-				      struct net_device *filter_dev, int *idx)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct device *dev = net_dev->dev.parent;
-	struct fdb_dump_entry *fdb_entries;
-	struct fdb_dump_entry fdb_entry;
-	struct ethsw_dump_ctx dump = {
-		.dev = net_dev,
-		.skb = skb,
-		.cb = cb,
-		.idx = *idx,
-	};
-	dma_addr_t fdb_dump_iova;
-	u16 num_fdb_entries;
-	u32 fdb_dump_size;
-	int err = 0, i;
-	u8 *dma_mem;
-
-	fdb_dump_size = ethsw->sw_attr.max_fdb_entries * sizeof(fdb_entry);
-	dma_mem = kzalloc(fdb_dump_size, GFP_KERNEL);
-	if (!dma_mem)
-		return -ENOMEM;
-
-	fdb_dump_iova = dma_map_single(dev, dma_mem, fdb_dump_size,
-				       DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, fdb_dump_iova)) {
-		netdev_err(net_dev, "dma_map_single() failed\n");
-		err = -ENOMEM;
-		goto err_map;
-	}
-
-	err = dpsw_fdb_dump(ethsw->mc_io, 0, ethsw->dpsw_handle, 0,
-			    fdb_dump_iova, fdb_dump_size, &num_fdb_entries);
-	if (err) {
-		netdev_err(net_dev, "dpsw_fdb_dump() = %d\n", err);
-		goto err_dump;
-	}
-
-	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_FROM_DEVICE);
-
-	fdb_entries = (struct fdb_dump_entry *)dma_mem;
-	for (i = 0; i < num_fdb_entries; i++) {
-		fdb_entry = fdb_entries[i];
-
-		if (!dpaa2_switch_port_fdb_valid_entry(&fdb_entry, port_priv))
-			continue;
-
-		err = dpaa2_switch_fdb_dump_nl(&fdb_entry, &dump);
-		if (err)
-			goto end;
-	}
-
-end:
-	*idx = dump.idx;
-
-	kfree(dma_mem);
-
-	return 0;
-
-err_dump:
-	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_TO_DEVICE);
-err_map:
-	kfree(dma_mem);
-	return err;
-}
-
-static int dpaa2_switch_port_set_mac_addr(struct ethsw_port_priv *port_priv)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *net_dev = port_priv->netdev;
-	struct device *dev = net_dev->dev.parent;
-	u8 mac_addr[ETH_ALEN];
-	int err;
-
-	if (!(ethsw->features & ETHSW_FEATURE_MAC_ADDR))
-		return 0;
-
-	/* Get firmware address, if any */
-	err = dpsw_if_get_port_mac_addr(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					port_priv->idx, mac_addr);
-	if (err) {
-		dev_err(dev, "dpsw_if_get_port_mac_addr() failed\n");
-		return err;
-	}
-
-	/* First check if firmware has any address configured by bootloader */
-	if (!is_zero_ether_addr(mac_addr)) {
-		memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
-	} else {
-		/* No MAC address configured, fill in net_dev->dev_addr
-		 * with a random one
-		 */
-		eth_hw_addr_random(net_dev);
-		dev_dbg_once(dev, "device(s) have all-zero hwaddr, replaced with random\n");
-
-		/* Override NET_ADDR_RANDOM set by eth_hw_addr_random(); for all
-		 * practical purposes, this will be our "permanent" mac address,
-		 * at least until the next reboot. This move will also permit
-		 * register_netdevice() to properly fill up net_dev->perm_addr.
-		 */
-		net_dev->addr_assign_type = NET_ADDR_PERM;
-	}
-
-	return 0;
-}
-
-static const struct net_device_ops dpaa2_switch_port_ops = {
-	.ndo_open		= dpaa2_switch_port_open,
-	.ndo_stop		= dpaa2_switch_port_stop,
-
-	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_get_stats64	= dpaa2_switch_port_get_stats,
-	.ndo_change_mtu		= dpaa2_switch_port_change_mtu,
-	.ndo_has_offload_stats	= dpaa2_switch_port_has_offload_stats,
-	.ndo_get_offload_stats	= dpaa2_switch_port_get_offload_stats,
-	.ndo_fdb_add		= dpaa2_switch_port_fdb_add,
-	.ndo_fdb_del		= dpaa2_switch_port_fdb_del,
-	.ndo_fdb_dump		= dpaa2_switch_port_fdb_dump,
-
-	.ndo_start_xmit		= dpaa2_switch_port_dropframe,
-	.ndo_get_port_parent_id	= dpaa2_switch_port_parent_id,
-	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
-};
-
-static bool dpaa2_switch_port_dev_check(const struct net_device *netdev,
-					struct notifier_block *nb)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
-	if (netdev->netdev_ops == &dpaa2_switch_port_ops &&
-	    (!nb || &port_priv->ethsw_data->port_nb == nb ||
-	     &port_priv->ethsw_data->port_switchdev_nb == nb ||
-	     &port_priv->ethsw_data->port_switchdevb_nb == nb))
-		return true;
-
-	return false;
-}
-
-static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
-{
-	int i;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
-		dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
-	}
-}
-
-static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
-{
-	struct device *dev = (struct device *)arg;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-
-	/* Mask the events and the if_id reserved bits to be cleared on read */
-	u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
-	int err;
-
-	err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, &status);
-	if (err) {
-		dev_err(dev, "Can't get irq status (err %d)\n", err);
-
-		err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					    DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
-		if (err)
-			dev_err(dev, "Can't clear irq status (err %d)\n", err);
-		goto out;
-	}
-
-	if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
-		dpaa2_switch_links_state_update(ethsw);
-
-out:
-	return IRQ_HANDLED;
-}
-
-static int dpaa2_switch_setup_irqs(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	u32 mask = DPSW_IRQ_EVENT_LINK_CHANGED;
-	struct fsl_mc_device_irq *irq;
-	int err;
-
-	err = fsl_mc_allocate_irqs(sw_dev);
-	if (err) {
-		dev_err(dev, "MC irqs allocation failed\n");
-		return err;
-	}
-
-	if (WARN_ON(sw_dev->obj_desc.irq_count != DPSW_IRQ_NUM)) {
-		err = -EINVAL;
-		goto free_irq;
-	}
-
-	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, 0);
-	if (err) {
-		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
-		goto free_irq;
-	}
-
-	irq = sw_dev->irqs[DPSW_IRQ_INDEX_IF];
-
-	err = devm_request_threaded_irq(dev, irq->msi_desc->irq,
-					NULL,
-					dpaa2_switch_irq0_handler_thread,
-					IRQF_NO_SUSPEND | IRQF_ONESHOT,
-					dev_name(dev), dev);
-	if (err) {
-		dev_err(dev, "devm_request_threaded_irq(): %d\n", err);
-		goto free_irq;
-	}
-
-	err = dpsw_set_irq_mask(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				DPSW_IRQ_INDEX_IF, mask);
-	if (err) {
-		dev_err(dev, "dpsw_set_irq_mask(): %d\n", err);
-		goto free_devm_irq;
-	}
-
-	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, 1);
-	if (err) {
-		dev_err(dev, "dpsw_set_irq_enable(): %d\n", err);
-		goto free_devm_irq;
-	}
-
-	return 0;
-
-free_devm_irq:
-	devm_free_irq(dev, irq->msi_desc->irq, dev);
-free_irq:
-	fsl_mc_free_irqs(sw_dev);
-	return err;
-}
-
-static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	int err;
-
-	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, 0);
-	if (err)
-		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
-
-	fsl_mc_free_irqs(sw_dev);
-}
-
-static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
-						u8 state)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
-	return dpaa2_switch_port_set_stp_state(port_priv, state);
-}
-
-static int
-dpaa2_switch_port_attr_br_flags_pre_set(struct net_device *netdev,
-					struct switchdev_brport_flags flags)
-{
-	if (flags.mask & ~(BR_LEARNING | BR_FLOOD))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int
-dpaa2_switch_port_attr_br_flags_set(struct net_device *netdev,
-				    struct switchdev_brport_flags flags)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err = 0;
-
-	if (flags.mask & BR_LEARNING) {
-		/* Learning is enabled per switch */
-		err = dpaa2_switch_set_learning(port_priv->ethsw_data,
-						!!(flags.val & BR_LEARNING));
-		if (err)
-			return err;
-	}
-
-	if (flags.mask & BR_FLOOD) {
-		err = dpaa2_switch_port_set_flood(port_priv,
-						  !!(flags.val & BR_FLOOD));
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_port_attr_set(struct net_device *netdev,
-				      const struct switchdev_attr *attr)
-{
-	int err = 0;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
-		err = dpaa2_switch_port_attr_stp_state_set(netdev,
-							   attr->u.stp_state);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
-		err = dpaa2_switch_port_attr_br_flags_pre_set(netdev,
-							      attr->u.brport_flags);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-		err = dpaa2_switch_port_attr_br_flags_set(netdev,
-							  attr->u.brport_flags);
-		break;
-	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
-		/* VLANs are supported by default  */
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_vlans_add(struct net_device *netdev,
-				       const struct switchdev_obj_port_vlan *vlan)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct dpsw_attr *attr = &ethsw->sw_attr;
-	int err = 0;
-
-	/* Make sure that the VLAN is not already configured
-	 * on the switch port
-	 */
-	if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER)
-		return -EEXIST;
-
-	/* Check if there is space for a new VLAN */
-	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  &ethsw->sw_attr);
-	if (err) {
-		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
-		return err;
-	}
-	if (attr->max_vlans - attr->num_vlans < 1)
-		return -ENOSPC;
-
-	/* Check if there is space for a new VLAN */
-	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  &ethsw->sw_attr);
-	if (err) {
-		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
-		return err;
-	}
-	if (attr->max_vlans - attr->num_vlans < 1)
-		return -ENOSPC;
-
-	if (!port_priv->ethsw_data->vlans[vlan->vid]) {
-		/* this is a new VLAN */
-		err = dpaa2_switch_add_vlan(port_priv->ethsw_data, vlan->vid);
-		if (err)
-			return err;
-
-		port_priv->ethsw_data->vlans[vlan->vid] |= ETHSW_VLAN_GLOBAL;
-	}
-
-	return dpaa2_switch_port_add_vlan(port_priv, vlan->vid, vlan->flags);
-}
-
-static int dpaa2_switch_port_lookup_address(struct net_device *netdev, int is_uc,
-					    const unsigned char *addr)
-{
-	struct netdev_hw_addr_list *list = (is_uc) ? &netdev->uc : &netdev->mc;
-	struct netdev_hw_addr *ha;
-
-	netif_addr_lock_bh(netdev);
-	list_for_each_entry(ha, &list->list, list) {
-		if (ether_addr_equal(ha->addr, addr)) {
-			netif_addr_unlock_bh(netdev);
-			return 1;
-		}
-	}
-	netif_addr_unlock_bh(netdev);
-	return 0;
-}
-
-static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
-				     const struct switchdev_obj_port_mdb *mdb)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	/* Check if address is already set on this port */
-	if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
-		return -EEXIST;
-
-	err = dpaa2_switch_port_fdb_add_mc(port_priv, mdb->addr);
-	if (err)
-		return err;
-
-	err = dev_mc_add(netdev, mdb->addr);
-	if (err) {
-		netdev_err(netdev, "dev_mc_add err %d\n", err);
-		dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_obj_add(struct net_device *netdev,
-				     const struct switchdev_obj *obj)
-{
-	int err;
-
-	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dpaa2_switch_port_vlans_add(netdev,
-						  SWITCHDEV_OBJ_PORT_VLAN(obj));
-		break;
-	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dpaa2_switch_port_mdb_add(netdev,
-						SWITCHDEV_OBJ_PORT_MDB(obj));
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_vlan_if_cfg vcfg;
-	int i, err;
-
-	if (!port_priv->vlans[vid])
-		return -ENOENT;
-
-	if (port_priv->vlans[vid] & ETHSW_VLAN_PVID) {
-		err = dpaa2_switch_port_set_pvid(port_priv, 0);
-		if (err)
-			return err;
-	}
-
-	vcfg.num_ifs = 1;
-	vcfg.if_id[0] = port_priv->idx;
-	if (port_priv->vlans[vid] & ETHSW_VLAN_UNTAGGED) {
-		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0,
-						   ethsw->dpsw_handle,
-						   vid, &vcfg);
-		if (err) {
-			netdev_err(netdev,
-				   "dpsw_vlan_remove_if_untagged err %d\n",
-				   err);
-		}
-		port_priv->vlans[vid] &= ~ETHSW_VLAN_UNTAGGED;
-	}
-
-	if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
-		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					  vid, &vcfg);
-		if (err) {
-			netdev_err(netdev,
-				   "dpsw_vlan_remove_if err %d\n", err);
-			return err;
-		}
-		port_priv->vlans[vid] &= ~ETHSW_VLAN_MEMBER;
-
-		/* Delete VLAN from switch if it is no longer configured on
-		 * any port
-		 */
-		for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-			if (ethsw->ports[i]->vlans[vid] & ETHSW_VLAN_MEMBER)
-				return 0; /* Found a port member in VID */
-
-		ethsw->vlans[vid] &= ~ETHSW_VLAN_GLOBAL;
-
-		err = dpaa2_switch_dellink(ethsw, vid);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_port_vlans_del(struct net_device *netdev,
-				       const struct switchdev_obj_port_vlan *vlan)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
-	if (netif_is_bridge_master(vlan->obj.orig_dev))
-		return -EOPNOTSUPP;
-
-	return dpaa2_switch_port_del_vlan(port_priv, vlan->vid);
-}
-
-static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
-				     const struct switchdev_obj_port_mdb *mdb)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
-		return -ENOENT;
-
-	err = dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
-	if (err)
-		return err;
-
-	err = dev_mc_del(netdev, mdb->addr);
-	if (err) {
-		netdev_err(netdev, "dev_mc_del err %d\n", err);
-		return err;
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_obj_del(struct net_device *netdev,
-				     const struct switchdev_obj *obj)
-{
-	int err;
-
-	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dpaa2_switch_port_vlans_del(netdev, SWITCHDEV_OBJ_PORT_VLAN(obj));
-		break;
-	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dpaa2_switch_port_mdb_del(netdev, SWITCHDEV_OBJ_PORT_MDB(obj));
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-	return err;
-}
-
-static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
-					    struct switchdev_notifier_port_attr_info
-					    *port_attr_info)
-{
-	int err;
-
-	err = dpaa2_switch_port_attr_set(netdev, port_attr_info->attr);
-
-	port_attr_info->handled = true;
-	return notifier_from_errno(err);
-}
-
-/* For the moment, only flood setting needs to be updated */
-static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
-					 struct net_device *upper_dev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct ethsw_port_priv *other_port_priv;
-	struct net_device *other_dev;
-	struct list_head *iter;
-	int i, err;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-		if (ethsw->ports[i]->bridge_dev &&
-		    (ethsw->ports[i]->bridge_dev != upper_dev)) {
-			netdev_err(netdev,
-				   "Only one bridge supported per DPSW object!\n");
-			return -EINVAL;
-		}
-
-	netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
-		if (!dpaa2_switch_port_dev_check(other_dev, NULL))
-			continue;
-
-		other_port_priv = netdev_priv(other_dev);
-		if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
-			netdev_err(netdev,
-				   "Interface from a different DPSW is in the bridge already!\n");
-			return -EINVAL;
-		}
-	}
-
-	/* Enable flooding */
-	err = dpaa2_switch_port_set_flood(port_priv, 1);
-	if (!err)
-		port_priv->bridge_dev = upper_dev;
-
-	return err;
-}
-
-static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	/* Disable flooding */
-	err = dpaa2_switch_port_set_flood(port_priv, 0);
-	if (!err)
-		port_priv->bridge_dev = NULL;
-
-	return err;
-}
-
-static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
-					     unsigned long event, void *ptr)
-{
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-	struct netdev_notifier_changeupper_info *info = ptr;
-	struct net_device *upper_dev;
-	int err = 0;
-
-	if (!dpaa2_switch_port_dev_check(netdev, nb))
-		return NOTIFY_DONE;
-
-	/* Handle just upper dev link/unlink for the moment */
-	if (event == NETDEV_CHANGEUPPER) {
-		upper_dev = info->upper_dev;
-		if (netif_is_bridge_master(upper_dev)) {
-			if (info->linking)
-				err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
-			else
-				err = dpaa2_switch_port_bridge_leave(netdev);
-		}
-	}
-
-	return notifier_from_errno(err);
-}
-
-struct ethsw_switchdev_event_work {
-	struct work_struct work;
-	struct switchdev_notifier_fdb_info fdb_info;
-	struct net_device *dev;
-	unsigned long event;
-};
-
-static void dpaa2_switch_event_work(struct work_struct *work)
-{
-	struct ethsw_switchdev_event_work *switchdev_work =
-		container_of(work, struct ethsw_switchdev_event_work, work);
-	struct net_device *dev = switchdev_work->dev;
-	struct switchdev_notifier_fdb_info *fdb_info;
-	int err;
-
-	rtnl_lock();
-	fdb_info = &switchdev_work->fdb_info;
-
-	switch (switchdev_work->event) {
-	case SWITCHDEV_FDB_ADD_TO_DEVICE:
-		if (!fdb_info->added_by_user)
-			break;
-		if (is_unicast_ether_addr(fdb_info->addr))
-			err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
-							   fdb_info->addr);
-		else
-			err = dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
-							   fdb_info->addr);
-		if (err)
-			break;
-		fdb_info->offloaded = true;
-		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
-					 &fdb_info->info, NULL);
-		break;
-	case SWITCHDEV_FDB_DEL_TO_DEVICE:
-		if (!fdb_info->added_by_user)
-			break;
-		if (is_unicast_ether_addr(fdb_info->addr))
-			dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
-		else
-			dpaa2_switch_port_fdb_del_mc(netdev_priv(dev), fdb_info->addr);
-		break;
-	}
-
-	rtnl_unlock();
-	kfree(switchdev_work->fdb_info.addr);
-	kfree(switchdev_work);
-	dev_put(dev);
-}
-
-/* Called under rcu_read_lock() */
-static int dpaa2_switch_port_event(struct notifier_block *nb,
-				   unsigned long event, void *ptr)
-{
-	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
-	struct ethsw_port_priv *port_priv = netdev_priv(dev);
-	struct ethsw_switchdev_event_work *switchdev_work;
-	struct switchdev_notifier_fdb_info *fdb_info = ptr;
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-
-	if (!dpaa2_switch_port_dev_check(dev, nb))
-		return NOTIFY_DONE;
-
-	if (event == SWITCHDEV_PORT_ATTR_SET)
-		return dpaa2_switch_port_attr_set_event(dev, ptr);
-
-	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
-	if (!switchdev_work)
-		return NOTIFY_BAD;
-
-	INIT_WORK(&switchdev_work->work, dpaa2_switch_event_work);
-	switchdev_work->dev = dev;
-	switchdev_work->event = event;
-
-	switch (event) {
-	case SWITCHDEV_FDB_ADD_TO_DEVICE:
-	case SWITCHDEV_FDB_DEL_TO_DEVICE:
-		memcpy(&switchdev_work->fdb_info, ptr,
-		       sizeof(switchdev_work->fdb_info));
-		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
-		if (!switchdev_work->fdb_info.addr)
-			goto err_addr_alloc;
-
-		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
-				fdb_info->addr);
-
-		/* Take a reference on the device to avoid being freed. */
-		dev_hold(dev);
-		break;
-	default:
-		kfree(switchdev_work);
-		return NOTIFY_DONE;
-	}
-
-	queue_work(ethsw->workqueue, &switchdev_work->work);
-
-	return NOTIFY_DONE;
-
-err_addr_alloc:
-	kfree(switchdev_work);
-	return NOTIFY_BAD;
-}
-
-static int dpaa2_switch_port_obj_event(unsigned long event,
-				       struct net_device *netdev,
-				       struct switchdev_notifier_port_obj_info *port_obj_info)
-{
-	int err = -EOPNOTSUPP;
-
-	switch (event) {
-	case SWITCHDEV_PORT_OBJ_ADD:
-		err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj);
-		break;
-	case SWITCHDEV_PORT_OBJ_DEL:
-		err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj);
-		break;
-	}
-
-	port_obj_info->handled = true;
-	return notifier_from_errno(err);
-}
-
-static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
-					    unsigned long event, void *ptr)
-{
-	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
-
-	if (!dpaa2_switch_port_dev_check(dev, nb))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case SWITCHDEV_PORT_OBJ_ADD:
-	case SWITCHDEV_PORT_OBJ_DEL:
-		return dpaa2_switch_port_obj_event(event, dev, ptr);
-	case SWITCHDEV_PORT_ATTR_SET:
-		return dpaa2_switch_port_attr_set_event(dev, ptr);
-	}
-
-	return NOTIFY_DONE;
-}
-
-static int dpaa2_switch_register_notifier(struct device *dev)
-{
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	int err;
-
-	ethsw->port_nb.notifier_call = dpaa2_switch_port_netdevice_event;
-	err = register_netdevice_notifier(&ethsw->port_nb);
-	if (err) {
-		dev_err(dev, "Failed to register netdev notifier\n");
-		return err;
-	}
-
-	ethsw->port_switchdev_nb.notifier_call = dpaa2_switch_port_event;
-	err = register_switchdev_notifier(&ethsw->port_switchdev_nb);
-	if (err) {
-		dev_err(dev, "Failed to register switchdev notifier\n");
-		goto err_switchdev_nb;
-	}
-
-	ethsw->port_switchdevb_nb.notifier_call = dpaa2_switch_port_blocking_event;
-	err = register_switchdev_blocking_notifier(&ethsw->port_switchdevb_nb);
-	if (err) {
-		dev_err(dev, "Failed to register switchdev blocking notifier\n");
-		goto err_switchdev_blocking_nb;
-	}
-
-	return 0;
-
-err_switchdev_blocking_nb:
-	unregister_switchdev_notifier(&ethsw->port_switchdev_nb);
-err_switchdev_nb:
-	unregister_netdevice_notifier(&ethsw->port_nb);
-	return err;
-}
-
-static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
-{
-	ethsw->features = 0;
-
-	if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6))
-		ethsw->features |= ETHSW_FEATURE_MAC_ADDR;
-}
-
-static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	struct dpsw_stp_cfg stp_cfg;
-	int err;
-	u16 i;
-
-	ethsw->dev_id = sw_dev->obj_desc.id;
-
-	err = dpsw_open(ethsw->mc_io, 0, ethsw->dev_id, &ethsw->dpsw_handle);
-	if (err) {
-		dev_err(dev, "dpsw_open err %d\n", err);
-		return err;
-	}
-
-	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  &ethsw->sw_attr);
-	if (err) {
-		dev_err(dev, "dpsw_get_attributes err %d\n", err);
-		goto err_close;
-	}
-
-	err = dpsw_get_api_version(ethsw->mc_io, 0,
-				   &ethsw->major,
-				   &ethsw->minor);
-	if (err) {
-		dev_err(dev, "dpsw_get_api_version err %d\n", err);
-		goto err_close;
-	}
-
-	/* Minimum supported DPSW version check */
-	if (ethsw->major < DPSW_MIN_VER_MAJOR ||
-	    (ethsw->major == DPSW_MIN_VER_MAJOR &&
-	     ethsw->minor < DPSW_MIN_VER_MINOR)) {
-		dev_err(dev, "DPSW version %d:%d not supported. Use %d.%d or greater.\n",
-			ethsw->major,
-			ethsw->minor,
-			DPSW_MIN_VER_MAJOR, DPSW_MIN_VER_MINOR);
-		err = -ENOTSUPP;
-		goto err_close;
-	}
-
-	dpaa2_switch_detect_features(ethsw);
-
-	err = dpsw_reset(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	if (err) {
-		dev_err(dev, "dpsw_reset err %d\n", err);
-		goto err_close;
-	}
-
-	err = dpsw_fdb_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle, 0,
-					 DPSW_FDB_LEARNING_MODE_HW);
-	if (err) {
-		dev_err(dev, "dpsw_fdb_set_learning_mode err %d\n", err);
-		goto err_close;
-	}
-
-	stp_cfg.vlan_id = DEFAULT_VLAN_ID;
-	stp_cfg.state = DPSW_STP_STATE_FORWARDING;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		err = dpsw_if_set_stp(ethsw->mc_io, 0, ethsw->dpsw_handle, i,
-				      &stp_cfg);
-		if (err) {
-			dev_err(dev, "dpsw_if_set_stp err %d for port %d\n",
-				err, i);
-			goto err_close;
-		}
-
-		err = dpsw_if_set_broadcast(ethsw->mc_io, 0,
-					    ethsw->dpsw_handle, i, 1);
-		if (err) {
-			dev_err(dev,
-				"dpsw_if_set_broadcast err %d for port %d\n",
-				err, i);
-			goto err_close;
-		}
-	}
-
-	ethsw->workqueue = alloc_ordered_workqueue("%s_%d_ordered",
-						   WQ_MEM_RECLAIM, "ethsw",
-						   ethsw->sw_attr.id);
-	if (!ethsw->workqueue) {
-		err = -ENOMEM;
-		goto err_close;
-	}
-
-	err = dpaa2_switch_register_notifier(dev);
-	if (err)
-		goto err_destroy_ordered_workqueue;
-
-	return 0;
-
-err_destroy_ordered_workqueue:
-	destroy_workqueue(ethsw->workqueue);
-
-err_close:
-	dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	return err;
-}
-
-static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
-{
-	struct net_device *netdev = port_priv->netdev;
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct dpsw_vlan_if_cfg vcfg;
-	int err;
-
-	/* Switch starts with all ports configured to VLAN 1. Need to
-	 * remove this setting to allow configuration at bridge join
-	 */
-	vcfg.num_ifs = 1;
-	vcfg.if_id[0] = port_priv->idx;
-
-	err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					   DEFAULT_VLAN_ID, &vcfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_vlan_remove_if_untagged err %d\n",
-			   err);
-		return err;
-	}
-
-	err = dpaa2_switch_port_set_pvid(port_priv, 0);
-	if (err)
-		return err;
-
-	err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DEFAULT_VLAN_ID, &vcfg);
-	if (err)
-		netdev_err(netdev, "dpsw_vlan_remove_if err %d\n", err);
-
-	return err;
-}
-
-static void dpaa2_switch_unregister_notifier(struct device *dev)
-{
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	struct notifier_block *nb;
-	int err;
-
-	nb = &ethsw->port_switchdevb_nb;
-	err = unregister_switchdev_blocking_notifier(nb);
-	if (err)
-		dev_err(dev,
-			"Failed to unregister switchdev blocking notifier (%d)\n",
-			err);
-
-	err = unregister_switchdev_notifier(&ethsw->port_switchdev_nb);
-	if (err)
-		dev_err(dev,
-			"Failed to unregister switchdev notifier (%d)\n", err);
-
-	err = unregister_netdevice_notifier(&ethsw->port_nb);
-	if (err)
-		dev_err(dev,
-			"Failed to unregister netdev notifier (%d)\n", err);
-}
-
-static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	int err;
-
-	dpaa2_switch_unregister_notifier(dev);
-
-	err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	if (err)
-		dev_warn(dev, "dpsw_close err %d\n", err);
-}
-
-static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
-{
-	struct ethsw_port_priv *port_priv;
-	struct ethsw_core *ethsw;
-	struct device *dev;
-	int i;
-
-	dev = &sw_dev->dev;
-	ethsw = dev_get_drvdata(dev);
-
-	dpaa2_switch_teardown_irqs(sw_dev);
-
-	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		port_priv = ethsw->ports[i];
-		unregister_netdev(port_priv->netdev);
-		free_netdev(port_priv->netdev);
-	}
-	kfree(ethsw->ports);
-
-	dpaa2_switch_takedown(sw_dev);
-
-	destroy_workqueue(ethsw->workqueue);
-
-	fsl_mc_portal_free(ethsw->mc_io);
-
-	kfree(ethsw);
-
-	dev_set_drvdata(dev, NULL);
-
-	return 0;
-}
-
-static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
-				   u16 port_idx)
-{
-	struct ethsw_port_priv *port_priv;
-	struct device *dev = ethsw->dev;
-	struct net_device *port_netdev;
-	int err;
-
-	port_netdev = alloc_etherdev(sizeof(struct ethsw_port_priv));
-	if (!port_netdev) {
-		dev_err(dev, "alloc_etherdev error\n");
-		return -ENOMEM;
-	}
-
-	port_priv = netdev_priv(port_netdev);
-	port_priv->netdev = port_netdev;
-	port_priv->ethsw_data = ethsw;
-
-	port_priv->idx = port_idx;
-	port_priv->stp_state = BR_STATE_FORWARDING;
-
-	/* Flooding is implicitly enabled */
-	port_priv->flood = true;
-
-	SET_NETDEV_DEV(port_netdev, dev);
-	port_netdev->netdev_ops = &dpaa2_switch_port_ops;
-	port_netdev->ethtool_ops = &dpaa2_switch_port_ethtool_ops;
-
-	/* Set MTU limits */
-	port_netdev->min_mtu = ETH_MIN_MTU;
-	port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
-
-	err = dpaa2_switch_port_init(port_priv, port_idx);
-	if (err)
-		goto err_port_probe;
-
-	err = dpaa2_switch_port_set_mac_addr(port_priv);
-	if (err)
-		goto err_port_probe;
-
-	err = register_netdev(port_netdev);
-	if (err < 0) {
-		dev_err(dev, "register_netdev error %d\n", err);
-		goto err_port_probe;
-	}
-
-	ethsw->ports[port_idx] = port_priv;
-
-	return 0;
-
-err_port_probe:
-	free_netdev(port_netdev);
-
-	return err;
-}
-
-static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw;
-	int i, err;
-
-	/* Allocate switch core*/
-	ethsw = kzalloc(sizeof(*ethsw), GFP_KERNEL);
-
-	if (!ethsw)
-		return -ENOMEM;
-
-	ethsw->dev = dev;
-	dev_set_drvdata(dev, ethsw);
-
-	err = fsl_mc_portal_allocate(sw_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
-				     &ethsw->mc_io);
-	if (err) {
-		if (err == -ENXIO)
-			err = -EPROBE_DEFER;
-		else
-			dev_err(dev, "fsl_mc_portal_allocate err %d\n", err);
-		goto err_free_drvdata;
-	}
-
-	err = dpaa2_switch_init(sw_dev);
-	if (err)
-		goto err_free_cmdport;
-
-	/* DEFAULT_VLAN_ID is implicitly configured on the switch */
-	ethsw->vlans[DEFAULT_VLAN_ID] = ETHSW_VLAN_MEMBER;
-
-	/* Learning is implicitly enabled */
-	ethsw->learning = true;
-
-	ethsw->ports = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->ports),
-			       GFP_KERNEL);
-	if (!(ethsw->ports)) {
-		err = -ENOMEM;
-		goto err_takedown;
-	}
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		err = dpaa2_switch_probe_port(ethsw, i);
-		if (err)
-			goto err_free_ports;
-	}
-
-	err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
-		goto err_free_ports;
-	}
-
-	/* Make sure the switch ports are disabled at probe time */
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-		dpsw_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle, i);
-
-	/* Setup IRQs */
-	err = dpaa2_switch_setup_irqs(sw_dev);
-	if (err)
-		goto err_stop;
-
-	dev_info(dev, "probed %d port switch\n", ethsw->sw_attr.num_ifs);
-	return 0;
-
-err_stop:
-	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-
-err_free_ports:
-	/* Cleanup registered ports only */
-	for (i--; i >= 0; i--) {
-		unregister_netdev(ethsw->ports[i]->netdev);
-		free_netdev(ethsw->ports[i]->netdev);
-	}
-	kfree(ethsw->ports);
-
-err_takedown:
-	dpaa2_switch_takedown(sw_dev);
-
-err_free_cmdport:
-	fsl_mc_portal_free(ethsw->mc_io);
-
-err_free_drvdata:
-	kfree(ethsw);
-	dev_set_drvdata(dev, NULL);
-
-	return err;
-}
-
-static const struct fsl_mc_device_id dpaa2_switch_match_id_table[] = {
-	{
-		.vendor = FSL_MC_VENDOR_FREESCALE,
-		.obj_type = "dpsw",
-	},
-	{ .vendor = 0x0 }
-};
-MODULE_DEVICE_TABLE(fslmc, dpaa2_switch_match_id_table);
-
-static struct fsl_mc_driver dpaa2_switch_drv = {
-	.driver = {
-		.name = KBUILD_MODNAME,
-		.owner = THIS_MODULE,
-	},
-	.probe = dpaa2_switch_probe,
-	.remove = dpaa2_switch_remove,
-	.match_id_table = dpaa2_switch_match_id_table
-};
-
-module_fsl_mc_driver(dpaa2_switch_drv);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("DPAA2 Ethernet Switch Driver");
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
deleted file mode 100644
index 5f9211ccb1ef..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * DPAA2 Ethernet Switch declarations
- *
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
- *
- */
-
-#ifndef __ETHSW_H
-#define __ETHSW_H
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/if_vlan.h>
-#include <uapi/linux/if_bridge.h>
-#include <net/switchdev.h>
-#include <linux/if_bridge.h>
-
-#include "dpsw.h"
-
-/* Number of IRQs supported */
-#define DPSW_IRQ_NUM	2
-
-/* Port is member of VLAN */
-#define ETHSW_VLAN_MEMBER	1
-/* VLAN to be treated as untagged on egress */
-#define ETHSW_VLAN_UNTAGGED	2
-/* Untagged frames will be assigned to this VLAN */
-#define ETHSW_VLAN_PVID		4
-/* VLAN configured on the switch */
-#define ETHSW_VLAN_GLOBAL	8
-
-/* Maximum Frame Length supported by HW (currently 10k) */
-#define DPAA2_MFL		(10 * 1024)
-#define ETHSW_MAX_FRAME_LENGTH	(DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN)
-#define ETHSW_L2_MAX_FRM(mtu)	((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN)
-
-#define ETHSW_FEATURE_MAC_ADDR	BIT(0)
-
-extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
-
-struct ethsw_core;
-
-/* Per port private data */
-struct ethsw_port_priv {
-	struct net_device	*netdev;
-	u16			idx;
-	struct ethsw_core	*ethsw_data;
-	u8			link_state;
-	u8			stp_state;
-	bool			flood;
-
-	u8			vlans[VLAN_VID_MASK + 1];
-	u16			pvid;
-	struct net_device	*bridge_dev;
-};
-
-/* Switch data */
-struct ethsw_core {
-	struct device			*dev;
-	struct fsl_mc_io		*mc_io;
-	u16				dpsw_handle;
-	struct dpsw_attr		sw_attr;
-	u16				major, minor;
-	unsigned long			features;
-	int				dev_id;
-	struct ethsw_port_priv		**ports;
-
-	u8				vlans[VLAN_VID_MASK + 1];
-	bool				learning;
-
-	struct notifier_block		port_nb;
-	struct notifier_block		port_switchdev_nb;
-	struct notifier_block		port_switchdevb_nb;
-	struct workqueue_struct		*workqueue;
-};
-
-#endif	/* __ETHSW_H */
diff --git a/include/linux/atm_suni.h b/include/linux/atm_suni.h
deleted file mode 100644
index 84f3aab54468..000000000000
--- a/include/linux/atm_suni.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* atm_suni.h - Driver-specific declarations of the SUNI driver (for use by
-		driver-specific utilities) */
-
-/* Written 1998,2000 by Werner Almesberger, EPFL ICA */
-
-
-#ifndef LINUX_ATM_SUNI_H
-#define LINUX_ATM_SUNI_H
-
-/* everything obsoleted */
-
-#endif
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 40bad71865ea..47482049f640 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,6 +136,9 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
+	/* opcode 34 - 46 are reserved */
+	VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
+	VIRTCHNL_OP_DEL_FDIR_FILTER = 48,
 };
 
 /* These macros are used to generate compilation errors if a structure/union
@@ -247,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
 #define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
+#define VIRTCHNL_VF_OFFLOAD_FDIR_PF		0X10000000
 
 /* Define below the capability flags that are not offloads */
 #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		0x00000080
@@ -559,6 +563,11 @@ enum virtchnl_action {
 	/* action types */
 	VIRTCHNL_ACTION_DROP = 0,
 	VIRTCHNL_ACTION_TC_REDIRECT,
+	VIRTCHNL_ACTION_PASSTHRU,
+	VIRTCHNL_ACTION_QUEUE,
+	VIRTCHNL_ACTION_Q_REGION,
+	VIRTCHNL_ACTION_MARK,
+	VIRTCHNL_ACTION_COUNT,
 };
 
 enum virtchnl_flow_type {
@@ -668,6 +677,269 @@ enum virtchnl_vfr_states {
 	VIRTCHNL_VFR_VFACTIVE,
 };
 
+#define VIRTCHNL_MAX_NUM_PROTO_HDRS	32
+#define PROTO_HDR_SHIFT			5
+#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT)
+#define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1)
+
+/* VF use these macros to configure each protocol header.
+ * Specify which protocol headers and protocol header fields base on
+ * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field.
+ * @param hdr: a struct of virtchnl_proto_hdr
+ * @param hdr_type: ETH/IPV4/TCP, etc
+ * @param field: SRC/DST/TEID/SPI, etc
+ */
+#define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \
+	((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \
+	((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \
+	((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr)	((hdr)->field_selector)
+
+#define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
+	(VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \
+		VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
+#define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
+	(VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \
+		VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
+
+#define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \
+	((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type)
+#define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \
+	(((hdr)->type) >> PROTO_HDR_SHIFT)
+#define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \
+	((hdr)->type == ((val) >> PROTO_HDR_SHIFT))
+#define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \
+	(VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \
+	 VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val)))
+
+/* Protocol header type within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization.
+ */
+enum virtchnl_proto_hdr_type {
+	VIRTCHNL_PROTO_HDR_NONE,
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_S_VLAN,
+	VIRTCHNL_PROTO_HDR_C_VLAN,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_TCP,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_SCTP,
+	VIRTCHNL_PROTO_HDR_GTPU_IP,
+	VIRTCHNL_PROTO_HDR_GTPU_EH,
+	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
+	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
+	VIRTCHNL_PROTO_HDR_PPPOE,
+	VIRTCHNL_PROTO_HDR_L2TPV3,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_AH,
+	VIRTCHNL_PROTO_HDR_PFCP,
+};
+
+/* Protocol header field within a protocol header. */
+enum virtchnl_proto_hdr_field {
+	/* ETHER */
+	VIRTCHNL_PROTO_HDR_ETH_SRC =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH),
+	VIRTCHNL_PROTO_HDR_ETH_DST,
+	VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE,
+	/* S-VLAN */
+	VIRTCHNL_PROTO_HDR_S_VLAN_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN),
+	/* C-VLAN */
+	VIRTCHNL_PROTO_HDR_C_VLAN_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN),
+	/* IPV4 */
+	VIRTCHNL_PROTO_HDR_IPV4_SRC =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4),
+	VIRTCHNL_PROTO_HDR_IPV4_DST,
+	VIRTCHNL_PROTO_HDR_IPV4_DSCP,
+	VIRTCHNL_PROTO_HDR_IPV4_TTL,
+	VIRTCHNL_PROTO_HDR_IPV4_PROT,
+	/* IPV6 */
+	VIRTCHNL_PROTO_HDR_IPV6_SRC =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6),
+	VIRTCHNL_PROTO_HDR_IPV6_DST,
+	VIRTCHNL_PROTO_HDR_IPV6_TC,
+	VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT,
+	VIRTCHNL_PROTO_HDR_IPV6_PROT,
+	/* TCP */
+	VIRTCHNL_PROTO_HDR_TCP_SRC_PORT =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP),
+	VIRTCHNL_PROTO_HDR_TCP_DST_PORT,
+	/* UDP */
+	VIRTCHNL_PROTO_HDR_UDP_SRC_PORT =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP),
+	VIRTCHNL_PROTO_HDR_UDP_DST_PORT,
+	/* SCTP */
+	VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP),
+	VIRTCHNL_PROTO_HDR_SCTP_DST_PORT,
+	/* GTPU_IP */
+	VIRTCHNL_PROTO_HDR_GTPU_IP_TEID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP),
+	/* GTPU_EH */
+	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH),
+	VIRTCHNL_PROTO_HDR_GTPU_EH_QFI,
+	/* PPPOE */
+	VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE),
+	/* L2TPV3 */
+	VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3),
+	/* ESP */
+	VIRTCHNL_PROTO_HDR_ESP_SPI =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP),
+	/* AH */
+	VIRTCHNL_PROTO_HDR_AH_SPI =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH),
+	/* PFCP */
+	VIRTCHNL_PROTO_HDR_PFCP_S_FIELD =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP),
+	VIRTCHNL_PROTO_HDR_PFCP_SEID,
+};
+
+struct virtchnl_proto_hdr {
+	enum virtchnl_proto_hdr_type type;
+	u32 field_selector; /* a bit mask to select field for header type */
+	u8 buffer[64];
+	/**
+	 * binary buffer in network order for specific header type.
+	 * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4
+	 * header is expected to be copied into the buffer.
+	 */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr);
+
+struct virtchnl_proto_hdrs {
+	u8 tunnel_level;
+	/**
+	 * specify where protocol header start from.
+	 * 0 - from the outer layer
+	 * 1 - from the first inner layer
+	 * 2 - from the second inner layer
+	 * ....
+	 **/
+	int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */
+	struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
+
+/* action configuration for FDIR */
+struct virtchnl_filter_action {
+	enum virtchnl_action type;
+	union {
+		/* used for queue and qgroup action */
+		struct {
+			u16 index;
+			u8 region;
+		} queue;
+		/* used for count action */
+		struct {
+			/* share counter ID with other flow rules */
+			u8 shared;
+			u32 id; /* counter ID */
+		} count;
+		/* used for mark action */
+		u32 mark_id;
+		u8 reserve[32];
+	} act_conf;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action);
+
+#define VIRTCHNL_MAX_NUM_ACTIONS  8
+
+struct virtchnl_filter_action_set {
+	/* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */
+	int count;
+	struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set);
+
+/* pattern and action for FDIR rule */
+struct virtchnl_fdir_rule {
+	struct virtchnl_proto_hdrs proto_hdrs;
+	struct virtchnl_filter_action_set action_set;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule);
+
+/* Status returned to VF after VF requests FDIR commands
+ * VIRTCHNL_FDIR_SUCCESS
+ * VF FDIR related request is successfully done by PF
+ * The request can be OP_ADD/DEL.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE
+ * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_EXIST
+ * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT
+ * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST
+ * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_INVALID
+ * OP_ADD_FDIR_FILTER request is failed due to parameters validation
+ * or HW doesn't support.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT
+ * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out
+ * for programming.
+ */
+enum virtchnl_fdir_prgm_status {
+	VIRTCHNL_FDIR_SUCCESS = 0,
+	VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE,
+	VIRTCHNL_FDIR_FAILURE_RULE_EXIST,
+	VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT,
+	VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST,
+	VIRTCHNL_FDIR_FAILURE_RULE_INVALID,
+	VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT,
+};
+
+/* VIRTCHNL_OP_ADD_FDIR_FILTER
+ * VF sends this request to PF by filling out vsi_id,
+ * validate_only and rule_cfg. PF will return flow_id
+ * if the request is successfully done and return add_status to VF.
+ */
+struct virtchnl_fdir_add {
+	u16 vsi_id;  /* INPUT */
+	/*
+	 * 1 for validating a fdir rule, 0 for creating a fdir rule.
+	 * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER.
+	 */
+	u16 validate_only; /* INPUT */
+	u32 flow_id;       /* OUTPUT */
+	struct virtchnl_fdir_rule rule_cfg; /* INPUT */
+	enum virtchnl_fdir_prgm_status status; /* OUTPUT */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add);
+
+/* VIRTCHNL_OP_DEL_FDIR_FILTER
+ * VF sends this request to PF by filling out vsi_id
+ * and flow_id. PF will return del_status to VF.
+ */
+struct virtchnl_fdir_del {
+	u16 vsi_id;  /* INPUT */
+	u16 pad;
+	u32 flow_id; /* INPUT */
+	enum virtchnl_fdir_prgm_status status; /* OUTPUT */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
+
 /**
  * virtchnl_vc_validate_vf_msg
  * @ver: Virtchnl version info
@@ -828,6 +1100,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
 		valid_len = sizeof(struct virtchnl_filter);
 		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER:
+		valid_len = sizeof(struct virtchnl_fdir_add);
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER:
+		valid_len = sizeof(struct virtchnl_fdir_del);
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3625f019767d..39dce9d3c3a5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,7 @@ struct bpf_local_storage;
 struct bpf_local_storage_map;
 struct kobject;
 struct mem_cgroup;
+struct bpf_func_state;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -118,6 +119,9 @@ struct bpf_map_ops {
 					   void *owner, u32 size);
 	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
 
+	/* Misc helpers.*/
+	int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+
 	/* map_meta_equal must be implemented for maps that can be
 	 * used as an inner map.  It is a runtime check to ensure
 	 * an inner map can be inserted to an outer map.
@@ -130,6 +134,13 @@ struct bpf_map_ops {
 	bool (*map_meta_equal)(const struct bpf_map *meta0,
 			       const struct bpf_map *meta1);
 
+
+	int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
+					      struct bpf_func_state *caller,
+					      struct bpf_func_state *callee);
+	int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
+				     void *callback_ctx, u64 flags);
+
 	/* BTF name and id of struct allocated by map_alloc */
 	const char * const map_btf_name;
 	int *map_btf_id;
@@ -296,6 +307,8 @@ enum bpf_arg_type {
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
 	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
+	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
+	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
 	__BPF_ARG_TYPE_MAX,
 };
 
@@ -412,6 +425,8 @@ enum bpf_reg_type {
 	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
 	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
 	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
+	PTR_TO_FUNC,		 /* reg points to a bpf program function */
+	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -507,6 +522,11 @@ enum bpf_cgroup_storage_type {
  */
 #define MAX_BPF_FUNC_ARGS 12
 
+/* The maximum number of arguments passed through registers
+ * a single function may have.
+ */
+#define MAX_BPF_FUNC_REG_ARGS 5
+
 struct btf_func_model {
 	u8 ret_size;
 	u8 nr_args;
@@ -1397,6 +1417,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
 int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
 				struct bpf_link_info *info);
 
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+				   struct bpf_func_state *caller,
+				   struct bpf_func_state *callee);
+
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@ -1446,9 +1470,9 @@ struct btf *bpf_get_btf_vmlinux(void);
 /* Map specifics */
 struct xdp_buff;
 struct sk_buff;
+struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
 
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_flush(void);
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -1458,7 +1482,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
 bool dev_map_can_have_prog(struct bpf_map *map);
 
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_flush(void);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -1487,6 +1510,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 			     const union bpf_attr *kattr,
 			     union bpf_attr __user *uattr);
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+				const union bpf_attr *kattr,
+				union bpf_attr __user *uattr);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
@@ -1516,6 +1542,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
 struct bpf_link *bpf_link_by_id(u32 id);
 
 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+void bpf_task_storage_free(struct task_struct *task);
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -1585,17 +1612,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 	return -EOPNOTSUPP;
 }
 
-static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
-						       u32 key)
-{
-	return NULL;
-}
-
-static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
-							     u32 key)
-{
-	return NULL;
-}
 static inline bool dev_map_can_have_prog(struct bpf_map *map)
 {
 	return false;
@@ -1607,6 +1623,7 @@ static inline void __dev_flush(void)
 
 struct xdp_buff;
 struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
 
 static inline
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@@ -1631,12 +1648,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
 	return 0;
 }
 
-static inline
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-	return NULL;
-}
-
 static inline void __cpu_map_flush(void)
 {
 }
@@ -1687,6 +1698,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	return -ENOTSUPP;
 }
 
+static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+					      const union bpf_attr *kattr,
+					      union bpf_attr __user *uattr)
+{
+	return -ENOTSUPP;
+}
+
 static inline void bpf_map_put(struct bpf_map *map)
 {
 }
@@ -1701,6 +1719,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 {
 	return NULL;
 }
+
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -1785,22 +1807,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 }
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
-#if defined(CONFIG_BPF_STREAM_PARSER)
-int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-			 struct bpf_prog *old, u32 which);
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
+
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+				       void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags);
 #else
-static inline int sock_map_prog_update(struct bpf_map *map,
-				       struct bpf_prog *prog,
-				       struct bpf_prog *old, u32 which)
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
 {
-	return -EOPNOTSUPP;
 }
 
+#ifdef CONFIG_BPF_SYSCALL
 static inline int sock_map_get_from_fd(const union bpf_attr *attr,
 				       struct bpf_prog *prog)
 {
@@ -1818,20 +1842,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
 	return -EOPNOTSUPP;
 }
-#endif /* CONFIG_BPF_STREAM_PARSER */
 
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
-				       void *value);
-int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
-				       void *value, u64 map_flags);
-#else
-static inline void bpf_sk_reuseport_detach(struct sock *sk)
-{
-}
-
-#ifdef CONFIG_BPF_SYSCALL
 static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
 						     void *key, void *value)
 {
@@ -1903,6 +1914,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
 extern const struct bpf_func_proto bpf_sock_from_file_proto;
 extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
+extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index b2c9463f36a1..b902c580c48d 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 			 struct bpf_local_storage_map *smap,
 			 bool cacheit_lockit);
 
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+				int __percpu *busy_counter);
 
 int bpf_local_storage_map_check_btf(const struct bpf_map *map,
 				    const struct btf *btf,
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 0d1c33ace398..479c101546ad 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
 }
 
-static inline struct bpf_storage_blob *bpf_task(
-	const struct task_struct *task)
-{
-	if (unlikely(!task->security))
-		return NULL;
-
-	return task->security + bpf_lsm_blob_sizes.lbs_task;
-}
-
 extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
-extern const struct bpf_func_proto bpf_task_storage_get_proto;
-extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
-void bpf_task_storage_free(struct task_struct *task);
 
 #else /* !CONFIG_BPF_LSM */
 
@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return NULL;
 }
 
-static inline struct bpf_storage_blob *bpf_task(
-	const struct task_struct *task)
-{
-	return NULL;
-}
-
 static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
-static inline void bpf_task_storage_free(struct task_struct *task)
-{
-}
-
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 99f7fd657d87..f883f01a5061 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -103,19 +103,17 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
-#if defined(CONFIG_BPF_STREAM_PARSER)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
-#endif
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 #endif
 #ifdef CONFIG_INET
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 971b33aca13d..51c2ffa3d901 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -68,6 +68,8 @@ struct bpf_reg_state {
 			unsigned long raw1;
 			unsigned long raw2;
 		} raw;
+
+		u32 subprogno; /* for PTR_TO_FUNC */
 	};
 	/* For PTR_TO_PACKET, used to find other pointers with the same variable
 	 * offset, so they can share range knowledge.
@@ -204,6 +206,7 @@ struct bpf_func_state {
 	int acquired_refs;
 	struct bpf_reference_state *refs;
 	int allocated_stack;
+	bool in_callback_fn;
 	struct bpf_stack_state *stack;
 };
 
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 4265f328681a..c6bc45ae5e03 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -160,11 +160,6 @@ static inline void ocelot_xfh_get_src_port(void *extraction, u64 *src_port)
 	packing(extraction, src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0);
 }
 
-static inline void ocelot_xfh_get_cpuq(void *extraction, u64 *cpuq)
-{
-	packing(extraction, cpuq, 28, 20, OCELOT_TAG_LEN, UNPACK, 0);
-}
-
 static inline void ocelot_xfh_get_qos_class(void *extraction, u64 *qos_class)
 {
 	packing(extraction, qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2e5debc0373c..330345b1be54 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -11,7 +11,7 @@
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *
- *		Relocated to include/linux where it belongs by Alan Cox 
+ *		Relocated to include/linux where it belongs by Alan Cox
  *							<gw4pts@gw4pts.ampr.org>
  */
 #ifndef _LINUX_ETHERDEVICE_H
@@ -29,7 +29,7 @@ struct device;
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
 unsigned char *arch_get_platform_mac_address(void);
 int nvmem_get_mac_address(struct device *dev, void *addrbuf);
-u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len);
+u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ec4cd3921c67..3583f7fc075c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -571,4 +571,13 @@ struct ethtool_phy_ops {
  */
 void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops);
 
+/**
+ * ethtool_sprintf - Write formatted string to ethtool string data
+ * @data: Pointer to start of string to update
+ * @fmt: Format of string to write
+ *
+ * Write formatted string to data. Update data to point at start of
+ * next string.
+ */
+extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3b00fc906ccd..b2b85b2cad8e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,7 +646,8 @@ struct bpf_redirect_info {
 	u32 flags;
 	u32 tgt_index;
 	void *tgt_value;
-	struct bpf_map *map;
+	u32 map_id;
+	enum bpf_map_type map_type;
 	u32 kern_flags;
 	struct bpf_nh_params nh;
 };
@@ -1472,4 +1473,32 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 }
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+						  void *lookup_elem(struct bpf_map *map, u32 key))
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	/* Lower bits of the flags are used as return code on lookup failure */
+	if (unlikely(flags > XDP_TX))
+		return XDP_ABORTED;
+
+	ri->tgt_value = lookup_elem(map, ifindex);
+	if (unlikely(!ri->tgt_value)) {
+		/* If the lookup fails we want to clear out the state in the
+		 * redirect_info struct completely, so that if an eBPF program
+		 * performs multiple lookups, the last one always takes
+		 * precedence.
+		 */
+		ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
+		ri->map_type = BPF_MAP_TYPE_UNSPEC;
+		return flags;
+	}
+
+	ri->tgt_index = ifindex;
+	ri->map_id = map->id;
+	ri->map_type = map->map_type;
+
+	return XDP_REDIRECT;
+}
+
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index b979005ea39c..2cc35038a8ca 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 bool br_multicast_enabled(const struct net_device *dev);
 bool br_multicast_router(const struct net_device *dev);
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb, struct netlink_ext_ack *extack);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
 					     struct list_head *br_ip_list)
@@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct net_device *dev)
 {
 	return false;
 }
+static inline int br_mdb_replay(struct net_device *br_dev,
+				struct net_device *dev,
+				struct notifier_block *nb,
+				struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
@@ -102,6 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid);
 int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
 		     struct bridge_vlan_info *p_vinfo);
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+		   struct notifier_block *nb, struct netlink_ext_ack *extack);
 #else
 static inline bool br_vlan_enabled(const struct net_device *dev)
 {
@@ -128,6 +139,14 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
 {
 	return -EINVAL;
 }
+
+static inline int br_vlan_replay(struct net_device *br_dev,
+				 struct net_device *dev,
+				 struct notifier_block *nb,
+				 struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_BRIDGE)
@@ -136,6 +155,10 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
 				    __u16 vid);
 void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
+u8 br_port_get_stp_state(const struct net_device *dev);
+clock_t br_get_ageing_time(struct net_device *br_dev);
+int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -154,6 +177,23 @@ br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
 {
 	return false;
 }
+
+static inline u8 br_port_get_stp_state(const struct net_device *dev)
+{
+	return BR_STATE_DISABLED;
+}
+
+static inline clock_t br_get_ageing_time(struct net_device *br_dev)
+{
+	return 0;
+}
+
+static inline int br_fdb_replay(struct net_device *br_dev,
+				struct net_device *dev,
+				struct notifier_block *nb)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #endif
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index 9661416a9bb4..4efb537f57f3 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -6,50 +6,43 @@
 #define _LINUX_IF_RMNET_H_
 
 struct rmnet_map_header {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	u8  pad_len:6;
-	u8  reserved_bit:1;
-	u8  cd_bit:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	u8  cd_bit:1;
-	u8  reserved_bit:1;
-	u8  pad_len:6;
-#else
-#error	"Please fix <asm/byteorder.h>"
-#endif
-	u8  mux_id;
-	__be16 pkt_len;
+	u8 flags;			/* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */
+	u8 mux_id;
+	__be16 pkt_len;			/* Length of packet, including pad */
 }  __aligned(1);
 
+/* rmnet_map_header flags field:
+ *  PAD_LEN:	number of pad bytes following packet data
+ *  CMD:	1 = packet contains a MAP command; 0 = packet contains data
+ */
+#define MAP_PAD_LEN_MASK		GENMASK(5, 0)
+#define MAP_CMD_FLAG			BIT(7)
+
 struct rmnet_map_dl_csum_trailer {
-	u8  reserved1;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	u8  valid:1;
-	u8  reserved2:7;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	u8  reserved2:7;
-	u8  valid:1;
-#else
-#error	"Please fix <asm/byteorder.h>"
-#endif
-	u16 csum_start_offset;
-	u16 csum_length;
+	u8 reserved1;
+	u8 flags;			/* MAP_CSUM_DL_VALID_FLAG */
+	__be16 csum_start_offset;
+	__be16 csum_length;
 	__be16 csum_value;
 } __aligned(1);
 
+/* rmnet_map_dl_csum_trailer flags field:
+ *  VALID:	1 = checksum and length valid; 0 = ignore them
+ */
+#define MAP_CSUM_DL_VALID_FLAG		BIT(0)
+
 struct rmnet_map_ul_csum_header {
 	__be16 csum_start_offset;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	u16 csum_insert_offset:14;
-	u16 udp_ind:1;
-	u16 csum_enabled:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	u16 csum_enabled:1;
-	u16 udp_ind:1;
-	u16 csum_insert_offset:14;
-#else
-#error	"Please fix <asm/byteorder.h>"
-#endif
+	__be16 csum_info;		/* MAP_CSUM_UL_* */
 } __aligned(1);
 
+/* csum_info field:
+ *  OFFSET:	where (offset in bytes) to insert computed checksum
+ *  UDP:	1 = UDP checksum (zero checkum means no checksum)
+ *  ENABLED:	1 = checksum computation requested
+ */
+#define MAP_CSUM_UL_OFFSET_MASK		GENMASK(13, 0)
+#define MAP_CSUM_UL_UDP_FLAG		BIT(14)
+#define MAP_CSUM_UL_ENABLED_FLAG	BIT(15)
+
 #endif /* !(_LINUX_IF_RMNET_H_) */
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 52b1610eae68..274abd5fbac3 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -24,6 +24,7 @@
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 #define MARVELL_PHY_ID_88X3310		0x002b09a0
 #define MARVELL_PHY_ID_88E2110		0x002b09b0
+#define MARVELL_PHY_ID_88X2222		0x01410f10
 
 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
 #define MARVELL_PHY_ID_88E1111_FINISAR	0x01ff0cc0
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dc3d2508f5c6..92a029a800a0 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1142,6 +1142,8 @@ enum mlx5_flex_parser_protos {
 	MLX5_FLEX_PROTO_GENEVE	      = 1 << 3,
 	MLX5_FLEX_PROTO_CW_MPLS_GRE   = 1 << 4,
 	MLX5_FLEX_PROTO_CW_MPLS_UDP   = 1 << 5,
+	MLX5_FLEX_PROTO_ICMP	      = 1 << 8,
+	MLX5_FLEX_PROTO_ICMPV6	      = 1 << 9,
 };
 
 /* MLX5 DEV CAPs */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 53b89631a1d9..23bb01d7c9b9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -644,10 +644,14 @@ struct mlx5_td {
 };
 
 struct mlx5e_resources {
-	u32                        pdn;
-	struct mlx5_td             td;
-	struct mlx5_core_mkey      mkey;
-	struct mlx5_sq_bfreg       bfreg;
+	struct mlx5e_hw_objs {
+		u32                        pdn;
+		struct mlx5_td             td;
+		struct mlx5_core_mkey      mkey;
+		struct mlx5_sq_bfreg       bfreg;
+	} hw_objs;
+	struct devlink_port dl_port;
+	struct net_device *uplink_netdev;
 };
 
 enum mlx5_sw_icm_type {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 87a5d186faff..f57b70fc251f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -756,6 +756,13 @@ struct rx_queue_attribute {
 			 const char *buf, size_t len);
 };
 
+/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
+enum xps_map_type {
+	XPS_CPUS = 0,
+	XPS_RXQS,
+	XPS_MAPS_MAX,
+};
+
 #ifdef CONFIG_XPS
 /*
  * This structure holds an XPS map which can be of variable length.  The
@@ -773,9 +780,19 @@ struct xps_map {
 
 /*
  * This structure holds all XPS maps for device.  Maps are indexed by CPU.
+ *
+ * We keep track of the number of cpus/rxqs used when the struct is allocated,
+ * in nr_ids. This will help not accessing out-of-bound memory.
+ *
+ * We keep track of the number of traffic classes used when the struct is
+ * allocated, in num_tc. This will be used to navigate the maps, to ensure we're
+ * not crossing its upper bound, as the original dev->num_tc can be updated in
+ * the meantime.
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
+	unsigned int nr_ids;
+	s16 num_tc;
 	struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */
 };
 
@@ -833,6 +850,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct net_device *sb_dev);
 
+enum net_device_path_type {
+	DEV_PATH_ETHERNET = 0,
+	DEV_PATH_VLAN,
+	DEV_PATH_BRIDGE,
+	DEV_PATH_PPPOE,
+	DEV_PATH_DSA,
+};
+
+struct net_device_path {
+	enum net_device_path_type	type;
+	const struct net_device		*dev;
+	union {
+		struct {
+			u16		id;
+			__be16		proto;
+			u8		h_dest[ETH_ALEN];
+		} encap;
+		struct {
+			enum {
+				DEV_PATH_BR_VLAN_KEEP,
+				DEV_PATH_BR_VLAN_TAG,
+				DEV_PATH_BR_VLAN_UNTAG,
+				DEV_PATH_BR_VLAN_UNTAG_HW,
+			}		vlan_mode;
+			u16		vlan_id;
+			__be16		vlan_proto;
+		} bridge;
+		struct {
+			int port;
+			u16 proto;
+		} dsa;
+	};
+};
+
+#define NET_DEVICE_PATH_STACK_MAX	5
+#define NET_DEVICE_PATH_VLAN_MAX	2
+
+struct net_device_path_stack {
+	int			num_paths;
+	struct net_device_path	path[NET_DEVICE_PATH_STACK_MAX];
+};
+
+struct net_device_path_ctx {
+	const struct net_device *dev;
+	const u8		*daddr;
+
+	int			num_vlans;
+	struct {
+		u16		id;
+		__be16		proto;
+	} vlan[NET_DEVICE_PATH_VLAN_MAX];
+};
+
 enum tc_setup_type {
 	TC_SETUP_QDISC_MQPRIO,
 	TC_SETUP_CLSU32,
@@ -1267,6 +1337,8 @@ struct netdev_net_notifier {
  * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
  *	If a device is paired with a peer device, return the peer instance.
  *	The caller must be under RCU read context.
+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
+ *     Get the forwarding path to reach the real device from the HW destination address
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1473,6 +1545,8 @@ struct net_device_ops {
 	int			(*ndo_tunnel_ctl)(struct net_device *dev,
 						  struct ip_tunnel_parm *p, int cmd);
 	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
+	int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
+                                                         struct net_device_path *path);
 };
 
 /**
@@ -1520,6 +1594,8 @@ struct net_device_ops {
  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
  * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
+ * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
+ *	skb_headlen(skb) == 0 (data starts from frag0)
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1553,6 +1629,7 @@ enum netdev_priv_flags {
 	IFF_FAILOVER_SLAVE		= 1<<28,
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
 	IFF_LIVE_RENAME_OK		= 1<<30,
+	IFF_TX_SKB_NO_LINEAR		= 1<<31,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1579,12 +1656,14 @@ enum netdev_priv_flags {
 #define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
 #define IFF_TEAM			IFF_TEAM
 #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
+#define IFF_PHONY_HEADROOM		IFF_PHONY_HEADROOM
 #define IFF_MACSEC			IFF_MACSEC
 #define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
 #define IFF_FAILOVER			IFF_FAILOVER
 #define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
 #define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER
 #define IFF_LIVE_RENAME_OK		IFF_LIVE_RENAME_OK
+#define IFF_TX_SKB_NO_LINEAR		IFF_TX_SKB_NO_LINEAR
 
 /* Specifies the type of the struct net_device::ml_priv pointer */
 enum netdev_ml_priv_type {
@@ -1760,8 +1839,7 @@ enum netdev_ml_priv_type {
  *	@tx_queue_len:		Max frames per queue allowed
  *	@tx_global_lock: 	XXX: need comments on this one
  *	@xdp_bulkq:		XDP device bulk queue
- *	@xps_cpus_map:		all CPUs map for XPS device
- *	@xps_rxqs_map:		all RXQs map for XPS device
+ *	@xps_maps:		all CPUs/RXQs maps for XPS device
  *
  *	@xps_maps:	XXX: need comments on this one
  *	@miniq_egress:		clsact qdisc specific data for
@@ -1773,6 +1851,7 @@ enum netdev_ml_priv_type {
  *
  *	@proto_down_reason:	reason a netdev interface is held down
  *	@pcpu_refcnt:		Number of references to this device
+ *	@dev_refcnt:		Number of references to this device
  *	@todo_list:		Delayed register/unregister
  *	@link_watch_list:	XXX: need comments on this one
  *
@@ -2057,8 +2136,7 @@ struct net_device {
 	struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps __rcu *xps_cpus_map;
-	struct xps_dev_maps __rcu *xps_rxqs_map;
+	struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -2074,7 +2152,12 @@ struct net_device {
 	u32                     proto_down_reason;
 
 	struct list_head	todo_list;
+
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	int __percpu		*pcpu_refcnt;
+#else
+	refcount_t		dev_refcnt;
+#endif
 
 	struct list_head	link_watch_list;
 
@@ -2846,6 +2929,8 @@ void dev_remove_offload(struct packet_offload *po);
 
 int dev_get_iflink(const struct net_device *dev);
 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+			  struct net_device_path_stack *stack);
 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
@@ -3424,6 +3509,24 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
 }
 
 /**
+ *	netdev_queue_set_dql_min_limit - set dql minimum limit
+ *	@dev_queue: pointer to transmit queue
+ *	@min_limit: dql minimum limit
+ *
+ * Forces xmit_more() to return true until the minimum threshold
+ * defined by @min_limit is reached (or until the tx queue is
+ * empty). Warning: to be use with care, misuse will impact the
+ * latency.
+ */
+static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue,
+						  unsigned int min_limit)
+{
+#ifdef CONFIG_BQL
+	dev_queue->dql.min_limit = min_limit;
+#endif
+}
+
+/**
  *	netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
  *	@dev_queue: pointer to transmit queue
  *
@@ -3688,7 +3791,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index);
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
-			  u16 index, bool is_rxqs_map);
+			  u16 index, enum xps_map_type type);
 
 /**
  *	netif_attr_test_mask - Test a CPU or Rx queue set in a mask
@@ -3783,7 +3886,7 @@ static inline int netif_set_xps_queue(struct net_device *dev,
 
 static inline int __netif_set_xps_queue(struct net_device *dev,
 					const unsigned long *mask,
-					u16 index, bool is_rxqs_map)
+					u16 index, enum xps_map_type type)
 {
 	return 0;
 }
@@ -4026,7 +4129,11 @@ void netdev_run_todo(void);
  */
 static inline void dev_put(struct net_device *dev)
 {
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	this_cpu_dec(*dev->pcpu_refcnt);
+#else
+	refcount_dec(&dev->dev_refcnt);
+#endif
 }
 
 /**
@@ -4037,7 +4144,11 @@ static inline void dev_put(struct net_device *dev)
  */
 static inline void dev_hold(struct net_device *dev)
 {
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	this_cpu_inc(*dev->pcpu_refcnt);
+#else
+	refcount_inc(&dev->dev_refcnt);
+#endif
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
@@ -4172,7 +4283,7 @@ static inline bool netif_oper_up(const struct net_device *dev)
  *
  * Check if device has not been removed from system.
  */
-static inline bool netif_device_present(struct net_device *dev)
+static inline bool netif_device_present(const struct net_device *dev)
 {
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
@@ -4611,6 +4722,7 @@ void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
+extern int		netdev_unregister_timeout_secs;
 extern int		weight_p;
 extern int		dev_weight_rx_bias;
 extern int		dev_weight_tx_bias;
@@ -5287,6 +5399,9 @@ do {								\
 #define PTYPE_HASH_SIZE	(16)
 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
 
+extern struct list_head ptype_all __read_mostly;
+extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+
 extern struct net_device *blackhole_netdev;
 
 #endif	/* _LINUX_NETDEVICE_H */
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 351c1c9aedc5..2cb5188a7ef1 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -10,10 +10,15 @@
 #include <linux/phy.h>
 #include <linux/phylink.h>
 
+/* AN mode */
+#define DW_AN_C73			1
+#define DW_AN_C37_SGMII			2
+
 struct mdio_xpcs_args {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
 	struct mii_bus *bus;
 	int addr;
+	int an_mode;
 };
 
 struct mdio_xpcs_ops {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1a12e4436b5b..8e2cf84b2318 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1532,6 +1532,7 @@ int genphy_c45_read_mdix(struct phy_device *phydev);
 int genphy_c45_pma_read_abilities(struct phy_device *phydev);
 int genphy_c45_read_status(struct phy_device *phydev);
 int genphy_c45_config_aneg(struct phy_device *phydev);
+int genphy_c45_loopback(struct phy_device *phydev, bool enable);
 
 /* Generic C45 PHY driver */
 extern struct phy_driver genphy_c45_driver;
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index d81a714cfbbd..fd2acfd9b597 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -64,6 +64,7 @@ enum phylink_op_type {
  * @pcs_poll: MAC PCS cannot provide link change interrupt
  * @poll_fixed_state: if true, starts link_poll,
  *		      if MAC link is at %MLO_AN_FIXED mode.
+ * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
  * @get_fixed_state: callback to execute to determine the fixed link state,
  *		     if MAC link is at %MLO_AN_FIXED mode.
  */
@@ -72,6 +73,7 @@ struct phylink_config {
 	enum phylink_op_type type;
 	bool pcs_poll;
 	bool poll_fixed_state;
+	bool ovr_an_inband;
 	void (*get_fixed_state)(struct phylink_config *config,
 				struct phylink_link_state *state);
 };
diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h
index 388846766bb2..6a000df5541f 100644
--- a/include/linux/platform_data/hirschmann-hellcreek.h
+++ b/include/linux/platform_data/hirschmann-hellcreek.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 
 struct hellcreek_platform_data {
+	const char *name;	/* Switch name */
 	int num_ports;		/* Amount of switch ports */
 	int is_100_mbits;	/* Is it configured to 100 or 1000 mbit/s */
 	int qbv_support;	/* Qbv support on front TSN ports */
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index 98966064ee68..91f9a928344e 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -28,6 +28,9 @@ struct ppp_channel_ops {
 	int	(*start_xmit)(struct ppp_channel *, struct sk_buff *);
 	/* Handle an ioctl call that has come in via /dev/ppp. */
 	int	(*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
+	int	(*fill_forward_path)(struct net_device_path_ctx *,
+				     struct net_device_path *,
+				     const struct ppp_channel *);
 };
 
 struct ppp_channel {
diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h
new file mode 100644
index 000000000000..51818198c292
--- /dev/null
+++ b/include/linux/ptp_pch.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PTP PCH
+ *
+ * Copyright 2019 Linaro Ltd.
+ *
+ * Author Lee Jones <lee.jones@linaro.org>
+ */
+
+#ifndef _PTP_PCH_H_
+#define _PTP_PCH_H_
+
+void pch_ch_control_write(struct pci_dev *pdev, u32 val);
+u32  pch_ch_event_read(struct pci_dev *pdev);
+void pch_ch_event_write(struct pci_dev *pdev, u32 val);
+u32  pch_src_uuid_lo_read(struct pci_dev *pdev);
+u32  pch_src_uuid_hi_read(struct pci_dev *pdev);
+u64  pch_rx_snap_read(struct pci_dev *pdev);
+u64  pch_tx_snap_read(struct pci_dev *pdev);
+int  pch_set_station_address(u8 *addr, struct pci_dev *pdev);
+
+#endif /* _PTP_PCH_H_ */
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index e339b48de32d..f34dbd0db795 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -19,7 +19,7 @@ enum qed_chain_mode {
 	/* Each Page contains a next pointer at its end */
 	QED_CHAIN_MODE_NEXT_PTR,
 
-	/* Chain is a single page (next ptr) is unrequired */
+	/* Chain is a single page (next ptr) is not required */
 	QED_CHAIN_MODE_SINGLE,
 
 	/* Page pointers are located in a side list */
@@ -56,13 +56,13 @@ struct qed_chain_pbl_u32 {
 };
 
 struct qed_chain_u16 {
-	/* Cyclic index of next element to produce/consme */
+	/* Cyclic index of next element to produce/consume */
 	u16						prod_idx;
 	u16						cons_idx;
 };
 
 struct qed_chain_u32 {
-	/* Cyclic index of next element to produce/consme */
+	/* Cyclic index of next element to produce/consume */
 	u32						prod_idx;
 	u32						cons_idx;
 };
@@ -270,7 +270,7 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain)
 /**
  * @brief qed_chain_advance_page -
  *
- * Advance the next element accros pages for a linked chain
+ * Advance the next element across pages for a linked chain
  *
  * @param p_chain
  * @param p_next_elem
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ef00bb22164c..e5b7d9054473 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct audit_context;
 struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
+struct bpf_local_storage;
 struct capture_control;
 struct cfs_rq;
 struct fs_struct;
@@ -1351,6 +1352,10 @@ struct task_struct {
 	/* Used by LSM modules for access restriction: */
 	void				*security;
 #endif
+#ifdef CONFIG_BPF_SYSCALL
+	/* Used by BPF task local storage */
+	struct bpf_local_storage __rcu	*bpf_storage;
+#endif
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	unsigned long			lowest_stack;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f2c9ee71cb2c..c8def85fcc22 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -657,6 +657,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@protocol: Packet protocol from driver
  *	@destructor: Destruct function
  *	@tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
+ *	@_sk_redir: socket redirection information for skmsg
  *	@_nfct: Associated connection, if any (with nfctinfo bits)
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
@@ -756,6 +757,9 @@ struct sk_buff {
 			void		(*destructor)(struct sk_buff *skb);
 		};
 		struct list_head	tcp_tsorted_anchor;
+#ifdef CONFIG_NET_SOCK_MSG
+		unsigned long		_sk_redir;
+#endif
 	};
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -1137,7 +1141,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
 
 	return skb->fclone == SKB_FCLONE_ORIG &&
 	       refcount_read(&fclones->fclone_ref) > 1 &&
-	       fclones->skb2.sk == sk;
+	       READ_ONCE(fclones->skb2.sk) == sk;
 }
 
 /**
@@ -1289,10 +1293,10 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 void __skb_get_hash(struct sk_buff *skb);
 u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
 		   const struct flow_keys_basic *keys, int hlen);
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
-			    void *data, int hlen_proto);
+			    const void *data, int hlen_proto);
 
 static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
 					int thoff, u8 ip_proto)
@@ -1311,9 +1315,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 bool __skb_flow_dissect(const struct net *net,
 			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container,
-			void *data, __be16 proto, int nhoff, int hlen,
-			unsigned int flags);
+			void *target_container, const void *data,
+			__be16 proto, int nhoff, int hlen, unsigned int flags);
 
 static inline bool skb_flow_dissect(const struct sk_buff *skb,
 				    struct flow_dissector *flow_dissector,
@@ -1335,9 +1338,9 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
 static inline bool
 skb_flow_dissect_flow_keys_basic(const struct net *net,
 				 const struct sk_buff *skb,
-				 struct flow_keys_basic *flow, void *data,
-				 __be16 proto, int nhoff, int hlen,
-				 unsigned int flags)
+				 struct flow_keys_basic *flow,
+				 const void *data, __be16 proto,
+				 int nhoff, int hlen, unsigned int flags)
 {
 	memset(flow, 0, sizeof(*flow));
 	return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
@@ -3675,14 +3678,13 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
 static inline void * __must_check
-__skb_header_pointer(const struct sk_buff *skb, int offset,
-		     int len, void *data, int hlen, void *buffer)
+__skb_header_pointer(const struct sk_buff *skb, int offset, int len,
+		     const void *data, int hlen, void *buffer)
 {
-	if (hlen - offset >= len)
-		return data + offset;
+	if (likely(hlen - offset >= len))
+		return (void *)data + offset;
 
-	if (!skb ||
-	    skb_copy_bits(skb, offset, buffer, len) < 0)
+	if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
 		return NULL;
 
 	return buffer;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 8edbbf5f2f93..6c09d94be2e9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -56,8 +56,8 @@ struct sk_msg {
 
 struct sk_psock_progs {
 	struct bpf_prog			*msg_parser;
-	struct bpf_prog			*skb_parser;
-	struct bpf_prog			*skb_verdict;
+	struct bpf_prog			*stream_parser;
+	struct bpf_prog			*stream_verdict;
 };
 
 enum sk_psock_state_bits {
@@ -70,12 +70,6 @@ struct sk_psock_link {
 	void				*link_raw;
 };
 
-struct sk_psock_parser {
-	struct strparser		strp;
-	bool				enabled;
-	void (*saved_data_ready)(struct sock *sk);
-};
-
 struct sk_psock_work_state {
 	struct sk_buff			*skb;
 	u32				len;
@@ -90,7 +84,9 @@ struct sk_psock {
 	u32				eval;
 	struct sk_msg			*cork;
 	struct sk_psock_progs		progs;
-	struct sk_psock_parser		parser;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strparser		strp;
+#endif
 	struct sk_buff_head		ingress_skb;
 	struct list_head		ingress_msg;
 	unsigned long			state;
@@ -100,6 +96,7 @@ struct sk_psock {
 	void (*saved_unhash)(struct sock *sk);
 	void (*saved_close)(struct sock *sk, long timeout);
 	void (*saved_write_space)(struct sock *sk);
+	void (*saved_data_ready)(struct sock *sk);
 	struct proto			*sk_proto;
 	struct sk_psock_work_state	work_state;
 	struct work_struct		work;
@@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node);
 
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
 void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
 void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+#else
+static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+
+static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+#endif
+
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
 void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
 
@@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
 
 struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
 
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
-
 static inline void sk_psock_cork_free(struct sk_psock *psock)
 {
 	if (psock->cork) {
@@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
 	return psock;
 }
 
-void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
 
 static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
@@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
 
 static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
 {
-	if (psock->parser.enabled)
-		psock->parser.saved_data_ready(sk);
+	if (psock->saved_data_ready)
+		psock->saved_data_ready(sk);
 	else
 		sk->sk_data_ready(sk);
 }
@@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
 static inline void psock_progs_drop(struct sk_psock_progs *progs)
 {
 	psock_set_prog(&progs->msg_parser, NULL);
-	psock_set_prog(&progs->skb_parser, NULL);
-	psock_set_prog(&progs->skb_verdict, NULL);
+	psock_set_prog(&progs->stream_parser, NULL);
+	psock_set_prog(&progs->stream_verdict, NULL);
 }
 
 int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
@@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 {
 	if (!psock)
 		return false;
-	return psock->parser.enabled;
+	return !!psock->saved_data_ready;
+}
+
+#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
+
+/* We only have one bit so far. */
+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
+
+static inline bool skb_bpf_ingress(const struct sk_buff *skb)
+{
+	unsigned long sk_redir = skb->_sk_redir;
+
+	return sk_redir & BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_ingress(struct sk_buff *skb)
+{
+	skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
+				     bool ingress)
+{
+	skb->_sk_redir = (unsigned long)sk_redir;
+	if (ingress)
+		skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
+{
+	unsigned long sk_redir = skb->_sk_redir;
+
+	return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
+}
+
+static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+	skb->_sk_redir = 0;
 }
+#endif /* CONFIG_NET_SOCK_MSG */
 #endif /* _LINUX_SKMSG_H */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a302982de2d7..febdb43d27e5 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -81,6 +81,7 @@
 struct stmmac_mdio_bus_data {
 	unsigned int phy_mask;
 	unsigned int has_xpcs;
+	unsigned int xpcs_an_inband;
 	int *irqs;
 	int probed_phy_irq;
 	bool needs_reset;
@@ -143,6 +144,32 @@ struct stmmac_txq_cfg {
 	int tbs_en;
 };
 
+/* FPE link state */
+enum stmmac_fpe_state {
+	FPE_STATE_OFF = 0,
+	FPE_STATE_CAPABLE = 1,
+	FPE_STATE_ENTERING_ON = 2,
+	FPE_STATE_ON = 3,
+};
+
+/* FPE link-partner hand-shaking mPacket type */
+enum stmmac_mpacket_type {
+	MPACKET_VERIFY = 0,
+	MPACKET_RESPONSE = 1,
+};
+
+enum stmmac_fpe_task_state_t {
+	__FPE_REMOVING,
+	__FPE_TASK_SCHED,
+};
+
+struct stmmac_fpe_cfg {
+	bool enable;				/* FPE enable */
+	bool hs_enable;				/* FPE handshake enable */
+	enum stmmac_fpe_state lp_fpe_state;	/* Link Partner FPE state */
+	enum stmmac_fpe_state lo_fpe_state;	/* Local station FPE state */
+};
+
 struct plat_stmmacenet_data {
 	int bus_id;
 	int phy_addr;
@@ -154,6 +181,7 @@ struct plat_stmmacenet_data {
 	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	struct stmmac_est *est;
+	struct stmmac_fpe_cfg *fpe_cfg;
 	int clk_csr;
 	int has_gmac;
 	int enh_desc;
@@ -180,9 +208,13 @@ struct plat_stmmacenet_data {
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	int (*serdes_powerup)(struct net_device *ndev, void *priv);
 	void (*serdes_powerdown)(struct net_device *ndev, void *priv);
+	void (*ptp_clk_freq_config)(void *priv);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
 	struct mac_device_info *(*setup)(void *priv);
+	int (*clks_config)(void *priv, bool enabled);
+	int (*crosststamp)(ktime_t *device, struct system_counterval_t *system,
+			   void *ctx);
 	void *bsp_priv;
 	struct clk *stmmac_clk;
 	struct clk *pclk;
@@ -203,5 +235,7 @@ struct plat_stmmacenet_data {
 	u8 vlan_fail_q;
 	unsigned int eee_usecs_rate;
 	struct pci_dev *pdev;
+	bool has_crossts;
+	int int_snapshot_num;
 };
 #endif
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 83a933e563fe..57b2c49f72f4 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -49,10 +49,12 @@ struct phylink_link_state;
 #define DSA_TAG_PROTO_XRS700X_VALUE		19
 #define DSA_TAG_PROTO_OCELOT_8021Q_VALUE	20
 #define DSA_TAG_PROTO_SEVILLE_VALUE		21
+#define DSA_TAG_PROTO_BRCM_LEGACY_VALUE		22
 
 enum dsa_tag_protocol {
 	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
 	DSA_TAG_PROTO_BRCM		= DSA_TAG_PROTO_BRCM_VALUE,
+	DSA_TAG_PROTO_BRCM_LEGACY	= DSA_TAG_PROTO_BRCM_LEGACY_VALUE,
 	DSA_TAG_PROTO_BRCM_PREPEND	= DSA_TAG_PROTO_BRCM_PREPEND_VALUE,
 	DSA_TAG_PROTO_DSA		= DSA_TAG_PROTO_DSA_VALUE,
 	DSA_TAG_PROTO_EDSA		= DSA_TAG_PROTO_EDSA_VALUE,
@@ -491,6 +493,20 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
 		return dp->vlan_filtering;
 }
 
+static inline
+struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
+{
+	if (!dp->bridge_dev)
+		return NULL;
+
+	if (dp->lag_dev)
+		return dp->lag_dev;
+	else if (dp->hsr_dev)
+		return dp->hsr_dev;
+
+	return dp->slave;
+}
+
 typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
 			      bool is_static, void *data);
 struct dsa_switch_ops {
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index cc10b10dc3a1..ffd386ea0dbb 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -350,7 +350,7 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 u32 flow_hash_from_keys(struct flow_keys *keys);
 void skb_flow_get_icmp_tci(const struct sk_buff *skb,
 			   struct flow_dissector_key_icmp *key_icmp,
-			   void *data, int thoff, int hlen);
+			   const void *data, int thoff, int hlen);
 
 static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
 				      enum flow_dissector_key_id key_id)
@@ -368,8 +368,8 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
 struct bpf_flow_dissector {
 	struct bpf_flow_keys	*flow_keys;
 	const struct sk_buff	*skb;
-	void			*data;
-	void			*data_end;
+	const void		*data;
+	const void		*data_end;
 };
 
 static inline void
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index e6bd8ebf9ac3..dc5c1e69cd9f 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -147,6 +147,7 @@ enum flow_action_id {
 	FLOW_ACTION_MPLS_POP,
 	FLOW_ACTION_MPLS_MANGLE,
 	FLOW_ACTION_GATE,
+	FLOW_ACTION_PPPOE_PUSH,
 	NUM_FLOW_ACTIONS,
 };
 
@@ -234,6 +235,8 @@ struct flow_action_entry {
 			u32			index;
 			u32			burst;
 			u64			rate_bytes_ps;
+			u64			burst_pkt;
+			u64			rate_pkt_ps;
 			u32			mtu;
 		} police;
 		struct {				/* FLOW_ACTION_CT */
@@ -272,6 +275,9 @@ struct flow_action_entry {
 			u32		num_entries;
 			struct action_gate_entry *entries;
 		} gate;
+		struct {				/* FLOW_ACTION_PPPOE_PUSH */
+			u16		sid;
+		} pppoe;
 	};
 	struct flow_action_cookie *cookie; /* user defined action cookie */
 };
diff --git a/include/net/gro.h b/include/net/gro.h
index 8a6eb5303cc4..01edaf3fdda0 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -3,10 +3,23 @@
 #ifndef _NET_IPV6_GRO_H
 #define _NET_IPV6_GRO_H
 
+#include <linux/indirect_call_wrapper.h>
+
+struct list_head;
+struct sk_buff;
+
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
 							   struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
 							   struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+
+#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb)	\
+({								\
+	unlikely(gro_recursion_inc_test(skb)) ?			\
+		NAPI_GRO_CB(skb)->flush |= 1, NULL :		\
+		INDIRECT_CALL_INET(cb, f2, f1, head, skb);	\
+})
+
 #endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index bd1f396cc9c7..448bf2b34759 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -30,6 +30,7 @@
  */
 
 #define NEXTHDR_HOP		0	/* Hop-by-hop option header. */
+#define NEXTHDR_IPV4		4	/* IPv4 in IPv6 */
 #define NEXTHDR_TCP		6	/* TCP segment. */
 #define NEXTHDR_UDP		17	/* UDP message. */
 #define NEXTHDR_IPV6		41	/* IPv6 in IPv6 */
diff --git a/include/net/lapb.h b/include/net/lapb.h
index eee73442a1ba..124ee122f2c8 100644
--- a/include/net/lapb.h
+++ b/include/net/lapb.h
@@ -92,7 +92,7 @@ struct lapb_cb {
 	unsigned short		n2, n2count;
 	unsigned short		t1, t2;
 	struct timer_list	t1timer, t2timer;
-	bool			t1timer_stop, t2timer_stop;
+	bool			t1timer_running, t2timer_running;
 
 	/* Internal control information */
 	struct sk_buff_head	write_queue;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 5694370be3d4..cea69c801595 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -34,6 +34,13 @@ struct mptcp_ext {
 	/* one byte hole */
 };
 
+#define MPTCP_RM_IDS_MAX	8
+
+struct mptcp_rm_list {
+	u8 ids[MPTCP_RM_IDS_MAX];
+	u8 nr;
+};
+
 struct mptcp_out_options {
 #if IS_ENABLED(CONFIG_MPTCP)
 	u16 suboptions;
@@ -48,7 +55,7 @@ struct mptcp_out_options {
 	u8 addr_id;
 	u16 port;
 	u64 ahmac;
-	u8 rm_id;
+	struct mptcp_rm_list rm_list;
 	u8 join_id;
 	u8 backup;
 	u32 nonce;
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 7b3c873f8839..e95483192d1b 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -4,7 +4,4 @@
 
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 
-#include <linux/sysctl.h>
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
-
 #endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 54c4d5c908a5..4d991c1e93ef 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -86,8 +86,16 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
 enum flow_offload_tuple_dir {
 	FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
 	FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
-	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
 };
+#define FLOW_OFFLOAD_DIR_MAX	IP_CT_DIR_MAX
+
+enum flow_offload_xmit_type {
+	FLOW_OFFLOAD_XMIT_NEIGH		= 0,
+	FLOW_OFFLOAD_XMIT_XFRM,
+	FLOW_OFFLOAD_XMIT_DIRECT,
+};
+
+#define NF_FLOW_TABLE_ENCAP_MAX		2
 
 struct flow_offload_tuple {
 	union {
@@ -107,15 +115,28 @@ struct flow_offload_tuple {
 
 	u8				l3proto;
 	u8				l4proto;
+	struct {
+		u16			id;
+		__be16			proto;
+	} encap[NF_FLOW_TABLE_ENCAP_MAX];
 
 	/* All members above are keys for lookups, see flow_offload_hash(). */
 	struct { }			__hash;
 
-	u8				dir;
-
+	u8				dir:2,
+					xmit_type:2,
+					encap_num:2,
+					in_vlan_ingress:2;
 	u16				mtu;
-
-	struct dst_entry		*dst_cache;
+	union {
+		struct dst_entry	*dst_cache;
+		struct {
+			u32		ifidx;
+			u32		hw_ifidx;
+			u8		h_source[ETH_ALEN];
+			u8		h_dest[ETH_ALEN];
+		} out;
+	};
 };
 
 struct flow_offload_tuple_rhash {
@@ -158,7 +179,23 @@ static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
 
 struct nf_flow_route {
 	struct {
-		struct dst_entry	*dst;
+		struct dst_entry		*dst;
+		struct {
+			u32			ifindex;
+			struct {
+				u16		id;
+				__be16		proto;
+			} encap[NF_FLOW_TABLE_ENCAP_MAX];
+			u8			num_encaps:2,
+						ingress_vlans:2;
+		} in;
+		struct {
+			u32			ifindex;
+			u32			hw_ifindex;
+			u8			h_source[ETH_ALEN];
+			u8			h_dest[ETH_ALEN];
+		} out;
+		enum flow_offload_xmit_type	xmit_type;
 	} tuple[FLOW_OFFLOAD_DIR_MAX];
 };
 
@@ -229,12 +266,12 @@ void nf_flow_table_free(struct nf_flowtable *flow_table);
 
 void flow_offload_teardown(struct flow_offload *flow);
 
-int nf_flow_snat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir);
-int nf_flow_dnat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_snat_port(const struct flow_offload *flow,
+		       struct sk_buff *skb, unsigned int thoff,
+		       u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_dnat_port(const struct flow_offload *flow,
+		       struct sk_buff *skb, unsigned int thoff,
+		       u8 protocol, enum flow_offload_tuple_dir dir);
 
 struct flow_ports {
 	__be16 source, dest;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 5aaced6bf13e..0cef5ad9768a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1498,13 +1498,16 @@ struct nft_trans_chain {
 
 struct nft_trans_table {
 	bool				update;
-	bool				enable;
+	u8				state;
+	u32				flags;
 };
 
 #define nft_trans_table_update(trans)	\
 	(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans)	\
-	(((struct nft_trans_table *)trans->data)->enable)
+#define nft_trans_table_state(trans)	\
+	(((struct nft_trans_table *)trans->data)->state)
+#define nft_trans_table_flags(trans)	\
+	(((struct nft_trans_table *)trans->data)->flags)
 
 struct nft_trans_elem {
 	struct nft_set			*set;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 70a2a085dd1a..9e3cb2722b80 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -11,7 +11,6 @@
 #include <linux/rcupdate.h>
 #include <linux/siphash.h>
 
-struct tcpm_hash_bucket;
 struct ctl_table_header;
 struct ipv4_devconf;
 struct fib_rules_ops;
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index a10a319d7eb2..28145f714801 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -40,6 +40,12 @@ struct nh_config {
 
 	struct nlattr	*nh_grp;
 	u16		nh_grp_type;
+	u16		nh_grp_res_num_buckets;
+	unsigned long	nh_grp_res_idle_timer;
+	unsigned long	nh_grp_res_unbalanced_timer;
+	bool		nh_grp_res_has_num_buckets;
+	bool		nh_grp_res_has_idle_timer;
+	bool		nh_grp_res_has_unbalanced_timer;
 
 	struct nlattr	*nh_encap;
 	u16		nh_encap_type;
@@ -63,6 +69,32 @@ struct nh_info {
 	};
 };
 
+struct nh_res_bucket {
+	struct nh_grp_entry __rcu *nh_entry;
+	atomic_long_t		used_time;
+	unsigned long		migrated_time;
+	bool			occupied;
+	u8			nh_flags;
+};
+
+struct nh_res_table {
+	struct net		*net;
+	u32			nhg_id;
+	struct delayed_work	upkeep_dw;
+
+	/* List of NHGEs that have too few buckets ("uw" for underweight).
+	 * Reclaimed buckets will be given to entries in this list.
+	 */
+	struct list_head	uw_nh_entries;
+	unsigned long		unbalanced_since;
+
+	u32			idle_timer;
+	u32			unbalanced_timer;
+
+	u16			num_nh_buckets;
+	struct nh_res_bucket	nh_buckets[];
+};
+
 struct nh_grp_entry {
 	struct nexthop	*nh;
 	u8		weight;
@@ -71,6 +103,13 @@ struct nh_grp_entry {
 		struct {
 			atomic_t	upper_bound;
 		} mpath;
+		struct {
+			/* Member on uw_nh_entries. */
+			struct list_head	uw_nh_entry;
+
+			u16			count_buckets;
+			u16			wants_buckets;
+		} res;
 	};
 
 	struct list_head nh_list;
@@ -80,9 +119,13 @@ struct nh_grp_entry {
 struct nh_group {
 	struct nh_group		*spare; /* spare group for removals */
 	u16			num_nh;
+	bool			is_multipath;
 	bool			mpath;
+	bool			resilient;
 	bool			fdb_nh;
 	bool			has_v4;
+
+	struct nh_res_table __rcu *res_table;
 	struct nh_grp_entry	nh_entries[];
 };
 
@@ -112,11 +155,15 @@ struct nexthop {
 enum nexthop_event_type {
 	NEXTHOP_EVENT_DEL,
 	NEXTHOP_EVENT_REPLACE,
+	NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+	NEXTHOP_EVENT_BUCKET_REPLACE,
 };
 
 enum nh_notifier_info_type {
 	NH_NOTIFIER_INFO_TYPE_SINGLE,
 	NH_NOTIFIER_INFO_TYPE_GRP,
+	NH_NOTIFIER_INFO_TYPE_RES_TABLE,
+	NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
 };
 
 struct nh_notifier_single_info {
@@ -143,6 +190,19 @@ struct nh_notifier_grp_info {
 	struct nh_notifier_grp_entry_info nh_entries[];
 };
 
+struct nh_notifier_res_bucket_info {
+	u16 bucket_index;
+	unsigned int idle_timer_ms;
+	bool force;
+	struct nh_notifier_single_info old_nh;
+	struct nh_notifier_single_info new_nh;
+};
+
+struct nh_notifier_res_table_info {
+	u16 num_nh_buckets;
+	struct nh_notifier_single_info nhs[];
+};
+
 struct nh_notifier_info {
 	struct net *net;
 	struct netlink_ext_ack *extack;
@@ -151,6 +211,8 @@ struct nh_notifier_info {
 	union {
 		struct nh_notifier_single_info *nh;
 		struct nh_notifier_grp_info *nh_grp;
+		struct nh_notifier_res_table_info *nh_res_table;
+		struct nh_notifier_res_bucket_info *nh_res_bucket;
 	};
 };
 
@@ -158,6 +220,10 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
 			      struct netlink_ext_ack *extack);
 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
+void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+				 bool offload, bool trap);
+void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+				     unsigned long *activity);
 
 /* caller is holding rcu or rtnl; no reference taken to nexthop */
 struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
@@ -212,7 +278,7 @@ static inline bool nexthop_is_multipath(const struct nexthop *nh)
 		struct nh_group *nh_grp;
 
 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
-		return nh_grp->mpath;
+		return nh_grp->is_multipath;
 	}
 	return false;
 }
@@ -227,7 +293,7 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh)
 		struct nh_group *nh_grp;
 
 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
-		if (nh_grp->mpath)
+		if (nh_grp->is_multipath)
 			rc = nh_grp->num_nh;
 	}
 
@@ -308,7 +374,7 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
 		struct nh_group *nh_grp;
 
 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
-		if (nh_grp->mpath) {
+		if (nh_grp->is_multipath) {
 			nh = nexthop_mpath_select(nh_grp, nhsel);
 			if (!nh)
 				return NULL;
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 15b1b30f454e..f5c1bee0cd6a 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -188,4 +188,13 @@ struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
 						  *offload);
 void taprio_offload_free(struct tc_taprio_qopt_offload *offload);
 
+/* Ensure skb_mstamp_ns, which might have been populated with the txtime, is
+ * not mistaken for a software timestamp, because this will otherwise prevent
+ * the dispatch of hardware timestamps to the socket.
+ */
+static inline void skb_txtime_consumed(struct sk_buff *skb)
+{
+	skb->tstamp = ktime_set(0, 0);
+}
+
 #endif
diff --git a/include/net/psample.h b/include/net/psample.h
index 68ae16bb0a4a..e328c5127757 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -14,6 +14,19 @@ struct psample_group {
 	struct rcu_head rcu;
 };
 
+struct psample_metadata {
+	u32 trunc_size;
+	int in_ifindex;
+	int out_ifindex;
+	u16 out_tc;
+	u64 out_tc_occ;	/* bytes */
+	u64 latency;	/* nanoseconds */
+	u8 out_tc_valid:1,
+	   out_tc_occ_valid:1,
+	   latency_valid:1,
+	   unused:5;
+};
+
 struct psample_group *psample_group_get(struct net *net, u32 group_num);
 void psample_group_take(struct psample_group *group);
 void psample_group_put(struct psample_group *group);
@@ -21,15 +34,13 @@ void psample_group_put(struct psample_group *group);
 #if IS_ENABLED(CONFIG_PSAMPLE)
 
 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
-			   u32 trunc_size, int in_ifindex, int out_ifindex,
-			   u32 sample_rate);
+			   u32 sample_rate, const struct psample_metadata *md);
 
 #else
 
 static inline void psample_sample_packet(struct psample_group *group,
-					 struct sk_buff *skb, u32 trunc_size,
-					 int in_ifindex, int out_ifindex,
-					 u32 sample_rate)
+					 struct sk_buff *skb, u32 sample_rate,
+					 const struct psample_metadata *md)
 {
 }
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2d6eb60c58c8..f7a6e14491fb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1242,6 +1242,20 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
 	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
+struct psched_pktrate {
+	u64	rate_pkts_ps; /* packets per second */
+	u32	mult;
+	u8	shift;
+};
+
+static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r,
+				  unsigned int pkt_num)
+{
+	return ((u64)pkt_num * r->mult) >> r->shift;
+}
+
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
+
 /* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
  * The fast path only needs to access filter list and to update stats
  */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index b7fc7d0f54e2..8c3218177136 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -68,6 +68,7 @@ enum switchdev_obj_id {
 };
 
 struct switchdev_obj {
+	struct list_head list;
 	struct net_device *orig_dev;
 	enum switchdev_obj_id id;
 	u32 flags;
diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index 6d1e26b709b5..72649512dcdd 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -10,10 +10,13 @@ struct tcf_police_params {
 	s64			tcfp_burst;
 	u32			tcfp_mtu;
 	s64			tcfp_mtu_ptoks;
+	s64			tcfp_pkt_burst;
 	struct psched_ratecfg	rate;
 	bool			rate_present;
 	struct psched_ratecfg	peak;
 	bool			peak_present;
+	struct psched_pktrate	ppsrate;
+	bool			pps_present;
 	struct rcu_head rcu;
 };
 
@@ -24,6 +27,7 @@ struct tcf_police {
 	spinlock_t		tcfp_lock ____cacheline_aligned_in_smp;
 	s64			tcfp_toks;
 	s64			tcfp_ptoks;
+	s64			tcfp_pkttoks;
 	s64			tcfp_t_c;
 };
 
@@ -97,6 +101,54 @@ static inline u32 tcf_police_burst(const struct tc_action *act)
 	return burst;
 }
 
+static inline u64 tcf_police_rate_pkt_ps(const struct tc_action *act)
+{
+	struct tcf_police *police = to_police(act);
+	struct tcf_police_params *params;
+
+	params = rcu_dereference_protected(police->params,
+					   lockdep_is_held(&police->tcf_lock));
+	return params->ppsrate.rate_pkts_ps;
+}
+
+static inline u32 tcf_police_burst_pkt(const struct tc_action *act)
+{
+	struct tcf_police *police = to_police(act);
+	struct tcf_police_params *params;
+	u32 burst;
+
+	params = rcu_dereference_protected(police->params,
+					   lockdep_is_held(&police->tcf_lock));
+
+	/*
+	 *  "rate" pkts     "burst" nanoseconds
+	 *  ------------ *  -------------------
+	 *    1 second          2^6 ticks
+	 *
+	 * ------------------------------------
+	 *        NSEC_PER_SEC nanoseconds
+	 *        ------------------------
+	 *              2^6 ticks
+	 *
+	 *    "rate" pkts    "burst" nanoseconds            2^6 ticks
+	 *  = ------------ * ------------------- * ------------------------
+	 *      1 second          2^6 ticks        NSEC_PER_SEC nanoseconds
+	 *
+	 *   "rate" * "burst"
+	 * = ---------------- pkts/nanosecond
+	 *    NSEC_PER_SEC^2
+	 *
+	 *
+	 *   "rate" * "burst"
+	 * = ---------------- pkts/second
+	 *     NSEC_PER_SEC
+	 */
+	burst = div_u64(params->tcfp_pkt_burst * params->ppsrate.rate_pkts_ps,
+			NSEC_PER_SEC);
+
+	return burst;
+}
+
 static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act)
 {
 	struct tcf_police *police = to_police(act);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 963cd86d12dd..075de26f449d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -883,36 +883,11 @@ struct tcp_skb_cb {
 			struct inet6_skb_parm	h6;
 #endif
 		} header;	/* For incoming skbs */
-		struct {
-			__u32 flags;
-			struct sock *sk_redir;
-			void *data_end;
-		} bpf;
 	};
 };
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
-static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
-{
-	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
-}
-
-static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
-{
-	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
-}
-
-static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
-{
-	return TCP_SKB_CB(skb)->bpf.sk_redir;
-}
-
-static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
-{
-	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
-}
-
 extern const struct inet_connection_sock_af_ops ipv4_specific;
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
 	__MODULE_INFO(alias, alias_userspace, name);		\
 	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
 
+#ifdef CONFIG_NET_SOCK_MSG
 struct sk_msg;
 struct sk_psock;
 
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
 struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
-#else
-static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
-{
-}
-#endif /* CONFIG_BPF_STREAM_PARSER */
+#endif /* CONFIG_BPF_SYSCALL */
 
-#ifdef CONFIG_NET_SOCK_MSG
 int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
 			  int flags);
 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 		      struct msghdr *msg, int len, int flags);
 #endif /* CONFIG_NET_SOCK_MSG */
 
+#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
+static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+#endif
+
 #ifdef CONFIG_CGROUP_BPF
 static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
 				      struct sk_buff *skb,
diff --git a/include/net/udp.h b/include/net/udp.h
index a132a02b2f2c..d4d064c59232 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 	return segs;
 }
 
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
 struct sk_psock;
 struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-#endif /* BPF_STREAM_PARSER */
+#endif
 
 #endif	/* _UDP_H */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index cc17bc957548..9c0722c6d7ac 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -80,19 +80,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
 
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-						     u32 key)
-{
-	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	struct xdp_sock *xs;
-
-	if (key >= map->max_entries)
-		return NULL;
-
-	xs = READ_ONCE(m->xsk_map[key]);
-	return xs;
-}
-
 #else
 
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -109,12 +96,6 @@ static inline void __xsk_map_flush(void)
 {
 }
 
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-						     u32 key)
-{
-	return NULL;
-}
-
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 425ff29d9389..68cdc7ceaf4d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -51,6 +51,7 @@
  */
 
 /* Reserve some destination PGIDs at the end of the range:
+ * PGID_BLACKHOLE: used for not forwarding the frames
  * PGID_CPU: used for whitelisting certain MAC addresses, such as the addresses
  *           of the switch port net devices, towards the CPU port module.
  * PGID_UC: the flooding destinations for unknown unicast traffic.
@@ -59,6 +60,7 @@
  * PGID_MCIPV6: the flooding destinations for IPv6 multicast traffic.
  * PGID_BC: the flooding destinations for broadcast traffic.
  */
+#define PGID_BLACKHOLE			57
 #define PGID_CPU			58
 #define PGID_UC				59
 #define PGID_MC				60
@@ -73,7 +75,7 @@
 
 #define for_each_nonreserved_multicast_dest_pgid(ocelot, pgid)	\
 	for ((pgid) = (ocelot)->num_phys_ports + 1;		\
-	     (pgid) < PGID_CPU;					\
+	     (pgid) < PGID_BLACKHOLE;				\
 	     (pgid)++)
 
 #define for_each_aggr_pgid(ocelot, pgid)			\
@@ -611,6 +613,11 @@ struct ocelot_port {
 
 	struct net_device		*bond;
 	bool				lag_tx_active;
+
+	u16				mrp_ring_id;
+
+	struct net_device		*bridge;
+	u8				stp_state;
 };
 
 struct ocelot {
@@ -630,10 +637,6 @@ struct ocelot {
 	int				num_frame_refs;
 	int				num_mact_rows;
 
-	struct net_device		*hw_bridge_dev;
-	u16				bridge_mask;
-	u16				bridge_fwd_mask;
-
 	struct ocelot_port		**ports;
 
 	u8				base_mac[ETH_ALEN];
@@ -679,12 +682,6 @@ struct ocelot {
 	/* Protects the PTP clock */
 	spinlock_t			ptp_clock_lock;
 	struct ptp_pin_desc		ptp_pins[OCELOT_PTP_PINS_NUM];
-
-#if IS_ENABLED(CONFIG_BRIDGE_MRP)
-	u16				mrp_ring_id;
-	struct net_device		*mrp_p_port;
-	struct net_device		*mrp_s_port;
-#endif
 };
 
 struct ocelot_policer {
@@ -806,10 +803,10 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
 				 struct switchdev_brport_flags val);
 void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,
 			      struct switchdev_brport_flags val);
-int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
-			    struct net_device *bridge);
-int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
 			     struct net_device *bridge);
+void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+			      struct net_device *bridge);
 int ocelot_fdb_dump(struct ocelot *ocelot, int port,
 		    dsa_fdb_dump_cb_t *cb, void *data);
 int ocelot_fdb_add(struct ocelot *ocelot, int port,
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 76a97176ab81..fcad3645a70b 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
 };
 #endif /* __DEVMAP_OBJ_TYPE */
 
-#define devmap_ifindex(tgt, map)				\
-	(((map->map_type == BPF_MAP_TYPE_DEVMAP ||	\
-		  map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
-	  ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
-
 DECLARE_EVENT_CLASS(xdp_redirect_template,
 
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
 
-	TP_ARGS(dev, xdp, tgt, err, map, index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),
 
 	TP_STRUCT__entry(
 		__field(int, prog_id)
@@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
 	),
 
 	TP_fast_assign(
+		u32 ifindex = 0, map_index = index;
+
+		if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+			ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+		} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
+			ifindex = index;
+			map_index = 0;
+		}
+
 		__entry->prog_id	= xdp->aux->id;
 		__entry->act		= XDP_REDIRECT;
 		__entry->ifindex	= dev->ifindex;
 		__entry->err		= err;
-		__entry->to_ifindex	= map ? devmap_ifindex(tgt, map) :
-						index;
-		__entry->map_id		= map ? map->id : 0;
-		__entry->map_index	= map ? index : 0;
+		__entry->to_ifindex	= ifindex;
+		__entry->map_id		= map_id;
+		__entry->map_index	= map_index;
 	),
 
 	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
-#define _trace_xdp_redirect(dev, xdp, to)				\
-	 trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
+#define _trace_xdp_redirect(dev, xdp, to)						\
+	 trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
 
-#define _trace_xdp_redirect_err(dev, xdp, to, err)			\
-	 trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
+#define _trace_xdp_redirect_err(dev, xdp, to, err)					\
+	 trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
 
-#define _trace_xdp_redirect_map(dev, xdp, to, map, index)		\
-	 trace_xdp_redirect(dev, xdp, to, 0, map, index)
+#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
+	 trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)
 
-#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err)	\
-	 trace_xdp_redirect_err(dev, xdp, to, err, map, index)
+#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
+	 trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)
 
 /* not used anymore, but kept around so as not to break old programs */
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
 TRACE_EVENT(xdp_cpumap_kthread,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4ba4ef0ff63a..008edc1dc8c1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
 	} map;
 };
 
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ *	Description
+ *		Create a map and return a file descriptor that refers to the
+ *		map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ *		is automatically enabled for the new file descriptor.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ *	Description
+ *		Look up an element with a given *key* in the map referred to
+ *		by the file descriptor *map_fd*.
+ *
+ *		The *flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ *	Description
+ *		Create or update an element (key/value pair) in a specified map.
+ *
+ *		The *flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create a new element or update an existing element.
+ *		**BPF_NOEXIST**
+ *			Create a new element only if it did not exist.
+ *		**BPF_EXIST**
+ *			Update an existing element.
+ *		**BPF_F_LOCK**
+ *			Update a spin_lock-ed map element.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ *		**E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ *		**E2BIG**
+ *			The number of elements in the map reached the
+ *			*max_entries* limit specified at map creation time.
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ *	Description
+ *		Look up and delete an element by key in a specified map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ *	Description
+ *		Look up an element by key in a specified map and return the key
+ *		of the next element. Can be used to iterate over all elements
+ *		in the map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		The following cases can be used to iterate over all elements of
+ *		the map:
+ *
+ *		* If *key* is not found, the operation returns zero and sets
+ *		  the *next_key* pointer to the key of the first element.
+ *		* If *key* is found, the operation returns zero and sets the
+ *		  *next_key* pointer to the key of the next element.
+ *		* If *key* is the last element, returns -1 and *errno* is set
+ *		  to **ENOENT**.
+ *
+ *		May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ *		**EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ *	Description
+ *		Verify and load an eBPF program, returning a new file
+ *		descriptor associated with the program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ *		The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ *		automatically enabled for the new file descriptor.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ *	Description
+ *		Pin an eBPF program or map referred by the specified *bpf_fd*
+ *		to the provided *pathname* on the filesystem.
+ *
+ *		The *pathname* argument must not contain a dot (".").
+ *
+ *		On success, *pathname* retains a reference to the eBPF object,
+ *		preventing deallocation of the object when the original
+ *		*bpf_fd* is closed. This allow the eBPF object to live beyond
+ *		**close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ *		process.
+ *
+ *		Applying **unlink**\ (2) or similar calls to the *pathname*
+ *		unpins the object from the filesystem, removing the reference.
+ *		If no other file descriptors or filesystem nodes refer to the
+ *		same object, it will be deallocated (see NOTES).
+ *
+ *		The filesystem type for the parent directory of *pathname* must
+ *		be **BPF_FS_MAGIC**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_OBJ_GET
+ *	Description
+ *		Open a file descriptor for the eBPF object pinned to the
+ *		specified *pathname*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook.
+ *
+ *		The *attach_type* specifies the eBPF attachment point to
+ *		attach the program to, and must be one of *bpf_attach_type*
+ *		(see below).
+ *
+ *		The *attach_bpf_fd* must be a valid file descriptor for a
+ *		loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ *		or sock_ops type corresponding to the specified *attach_type*.
+ *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_TYPE_SK_SKB**,
+ *		**BPF_PROG_TYPE_SK_MSG**
+ *
+ *			eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ *	Description
+ *		Detach the eBPF program associated with the *target_fd* at the
+ *		hook specified by *attach_type*. The program must have been
+ *		previously attached using **BPF_PROG_ATTACH**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ *	Description
+ *		Run the eBPF program associated with the *prog_fd* a *repeat*
+ *		number of times against a provided program context *ctx_in* and
+ *		data *data_in*, and return the modified program context
+ *		*ctx_out*, *data_out* (for example, packet data), result of the
+ *		execution *retval*, and *duration* of the test run.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		**ENOSPC**
+ *			Either *data_size_out* or *ctx_size_out* is too small.
+ *		**ENOTSUPP**
+ *			This command is not supported by the program type of
+ *			the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF program currently loaded into the kernel.
+ *
+ *		Looks for the eBPF program with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF programs
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF map currently loaded into the kernel.
+ *
+ *		Looks for the eBPF map with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF maps
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF program corresponding to
+ *		*prog_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF map corresponding to
+ *		*map_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ *	Description
+ *		Obtain information about the eBPF object corresponding to
+ *		*bpf_fd*.
+ *
+ *		Populates up to *info_len* bytes of *info*, which will be in
+ *		one of the following formats depending on the eBPF object type
+ *		of *bpf_fd*:
+ *
+ *		* **struct bpf_prog_info**
+ *		* **struct bpf_map_info**
+ *		* **struct bpf_btf_info**
+ *		* **struct bpf_link_info**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		specified *attach_type* hook.
+ *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_QUERY** always fetches the number of programs
+ *		attached and the *attach_flags* which were used to attach those
+ *		programs. Additionally, if *prog_ids* is nonzero and the number
+ *		of attached programs is less than *prog_cnt*, populates
+ *		*prog_ids* with the eBPF program ids of the programs attached
+ *		at *target_fd*.
+ *
+ *		The following flags may alter the result:
+ *
+ *		**BPF_F_QUERY_EFFECTIVE**
+ *			Only return information regarding programs which are
+ *			currently effective at the specified *target_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ *	Description
+ *		Attach an eBPF program to a tracepoint *name* to access kernel
+ *		internal arguments of the tracepoint in their raw form.
+ *
+ *		The *prog_fd* must be a valid file descriptor associated with
+ *		a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ *		No ABI guarantees are made about the content of tracepoint
+ *		arguments exposed to the corresponding eBPF program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ *	Description
+ *		Verify and load BPF Type Format (BTF) metadata into the kernel,
+ *		returning a new file descriptor associated with the metadata.
+ *		BTF is described in more detail at
+ *		https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ *		The *btf* parameter must point to valid memory providing
+ *		*btf_size* bytes of BTF binary metadata.
+ *
+ *		The returned file descriptor can be passed to other **bpf**\ ()
+ *		subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ *		associate the BTF with those objects.
+ *
+ *		Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ *		parameters to specify a *btf_log_buf*, *btf_log_size* and
+ *		*btf_log_level* which allow the kernel to return freeform log
+ *		output regarding the BTF verification process.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the BPF Type Format (BTF)
+ *		corresponding to *btf_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		target process identified by *pid* and *fd*.
+ *
+ *		If the *pid* and *fd* are associated with a tracepoint, kprobe
+ *		or uprobe perf event, then the *prog_id* and *fd_type* will
+ *		be populated with the eBPF program id and file descriptor type
+ *		of type **bpf_task_fd_type**. If associated with a kprobe or
+ *		uprobe, the  *probe_offset* and *probe_addr* will also be
+ *		populated. Optionally, if *buf* is provided, then up to
+ *		*buf_len* bytes of *buf* will be populated with the name of
+ *		the tracepoint, kprobe or uprobe.
+ *
+ *		The resulting *prog_id* may be introspected in deeper detail
+ *		using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ *	Description
+ *		Look up an element with the given *key* in the map referred to
+ *		by the file descriptor *fd*, and if found, delete the element.
+ *
+ *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ *		implement this command as a "pop" operation, deleting the top
+ *		element rather than one corresponding to *key*.
+ *		The *key* and *key_len* parameters should be zeroed when
+ *		issuing this operation for these map types.
+ *
+ *		This command is only valid for the following map types:
+ *		* **BPF_MAP_TYPE_QUEUE**
+ *		* **BPF_MAP_TYPE_STACK**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ *	Description
+ *		Freeze the permissions of the specified map.
+ *
+ *		Write permissions may be frozen by passing zero *flags*.
+ *		Upon success, no future syscall invocations may alter the
+ *		map state of *map_fd*. Write operations from eBPF programs
+ *		are still possible for a frozen map.
+ *
+ *		Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ *	Description
+ *		Fetch the next BPF Type Format (BTF) object currently loaded
+ *		into the kernel.
+ *
+ *		Looks for the BTF object with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other BTF objects
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ *	Description
+ *		Iterate and fetch multiple elements in a map.
+ *
+ *		Two opaque values are used to manage batch operations,
+ *		*in_batch* and *out_batch*. Initially, *in_batch* must be set
+ *		to NULL to begin the batched operation. After each subsequent
+ *		**BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ *		*out_batch* as the *in_batch* for the next operation to
+ *		continue iteration from the current point.
+ *
+ *		The *keys* and *values* are output parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are copied into the
+ *		user buffer, with the keys copied into *keys* and the values
+ *		copied into the corresponding indices in *values*.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **ENOSPC** to indicate that *keys* or
+ *		*values* is too small to dump an entire bucket during
+ *		iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ *	Description
+ *		Iterate and delete all elements in a map.
+ *
+ *		This operation has the same behavior as
+ *		**BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ *		* Every element that is successfully returned is also deleted
+ *		  from the map. This is at least *count* elements. Note that
+ *		  *count* is both an input and an output parameter.
+ *		* Upon returning with *errno* set to **EFAULT**, up to
+ *		  *count* elements may be deleted without returning the keys
+ *		  and values of the deleted elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ *	Description
+ *		Update multiple elements in a map by *key*.
+ *
+ *		The *keys* and *values* are input parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially updated to the
+ *		value in the corresponding index in *values*. The *in_batch*
+ *		and *out_batch* parameters are ignored and should be zeroed.
+ *
+ *		The *elem_flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create new elements or update a existing elements.
+ *		**BPF_NOEXIST**
+ *			Create new elements only if they do not exist.
+ *		**BPF_EXIST**
+ *			Update existing elements.
+ *		**BPF_F_LOCK**
+ *			Update spin_lock-ed map elements. This must be
+ *			specified if the map value contains a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ *		**E2BIG**. **E2BIG** indicates that the number of elements in
+ *		the map reached the *max_entries* limit specified at map
+ *		creation time.
+ *
+ *		May set *errno* to one of the following error codes under
+ *		specific circumstances:
+ *
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ *	Description
+ *		Delete multiple elements in a map by *key*.
+ *
+ *		The *keys* parameter is an input parameter which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially deleted. The
+ *		*in_batch*, *out_batch*, and *values* parameters are ignored
+ *		and should be zeroed.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements. If
+ *		*errno* is **EFAULT**, up to *count* elements may be been
+ *		deleted.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook and return a file descriptor handle for
+ *		managing the link.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ *	Description
+ *		Update the eBPF program in the specified *link_fd* to
+ *		*new_prog_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF Link corresponding to
+ *		*link_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF link currently loaded into the kernel.
+ *
+ *		Looks for the eBPF link with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF links
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ *	Description
+ *		Enable eBPF runtime statistics gathering.
+ *
+ *		Runtime statistics gathering for the eBPF runtime is disabled
+ *		by default to minimize the corresponding performance overhead.
+ *		This command enables statistics globally.
+ *
+ *		Multiple programs may independently enable statistics.
+ *		After gathering the desired statistics, eBPF runtime statistics
+ *		may be disabled again by calling **close**\ (2) for the file
+ *		descriptor returned by this function. Statistics will only be
+ *		disabled system-wide when all outstanding file descriptors
+ *		returned by prior calls for this subcommand are closed.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ *	Description
+ *		Create an iterator on top of the specified *link_fd* (as
+ *		previously created using **BPF_LINK_CREATE**) and return a
+ *		file descriptor that can be used to trigger the iteration.
+ *
+ *		If the resulting file descriptor is pinned to the filesystem
+ *		using  **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ *		for that path will trigger the iterator to read kernel state
+ *		using the eBPF program attached to *link_fd*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ *	Description
+ *		Forcefully detach the specified *link_fd* from its
+ *		corresponding attachment point.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ *	Description
+ *		Bind a map to the lifetime of an eBPF program.
+ *
+ *		The map identified by *map_fd* is bound to the program
+ *		identified by *prog_fd* and only released when *prog_fd* is
+ *		released. This may be used in cases where metadata should be
+ *		associated with a program which otherwise does not contain any
+ *		references to the map (for example, embedded in the eBPF
+ *		program instructions).
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * NOTES
+ *	eBPF objects (maps and programs) can be shared between processes.
+ *
+ *	* After **fork**\ (2), the child inherits file descriptors
+ *	  referring to the same eBPF objects.
+ *	* File descriptors referring to eBPF objects can be transferred over
+ *	  **unix**\ (7) domain sockets.
+ *	* File descriptors referring to eBPF objects can be duplicated in the
+ *	  usual way, using **dup**\ (2) and similar calls.
+ *	* File descriptors referring to eBPF objects can be pinned to the
+ *	  filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ *	An eBPF object is deallocated only after all file descriptors referring
+ *	to the object have been closed and no references remain pinned to the
+ *	filesystem or attached (for example, bound to a program or device).
+ */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
 	BPF_MAP_LOOKUP_ELEM,
@@ -393,6 +1103,15 @@ enum bpf_link_type {
  *                   is struct/union.
  */
 #define BPF_PSEUDO_BTF_ID	3
+/* insn[0].src_reg:  BPF_PSEUDO_FUNC
+ * insn[0].imm:      insn offset to the func
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the function
+ * verifier type:    PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC		4
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
@@ -720,7 +1439,7 @@ union bpf_attr {
  * parsed and used to produce a manual page. The workflow is the following,
  * and requires the rst2man utility:
  *
- *     $ ./scripts/bpf_helpers_doc.py \
+ *     $ ./scripts/bpf_doc.py \
  *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
  *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
  *     $ man /tmp/bpf-helpers.7
@@ -1765,6 +2484,10 @@ union bpf_attr {
  *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
  *		  type; *len* is the length of the inner MAC header.
  *
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *		  Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *		  L2 type as Ethernet.
+ *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
  * 		previously done by the verifier are invalidated and must be
@@ -3915,6 +4638,34 @@ union bpf_attr {
  *		* **BPF_MTU_CHK_RET_FRAG_NEEDED**
  *		* **BPF_MTU_CHK_RET_SEGS_TOOBIG**
  *
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		For each element in **map**, call **callback_fn** function with
+ *		**map**, **callback_ctx** and other map-specific parameters.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		The **flags** is used to control certain aspects of the helper.
+ *		Currently, the **flags** must be 0.
+ *
+ *		The following are a list of supported map types and their
+ *		respective expected callback signatures:
+ *
+ *		BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ *		BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ *		BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ *		long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ *		For per_cpu maps, the map_value is the value on the cpu where the
+ *		bpf_prog is running.
+ *
+ *		If **callback_fn** return 0, the helper will continue to the next
+ *		element. If return value is 1, the helper will skip the rest of
+ *		elements and return. Other return values are not used now.
+ *
+ *	Return
+ *		The number of traversed map elements for success, **-EINVAL** for
+ *		invalid **flags**.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4081,6 +4832,7 @@ union bpf_attr {
 	FN(ima_inode_hash),		\
 	FN(sock_from_file),		\
 	FN(check_mtu),			\
+	FN(for_each_map_elem),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4174,6 +4926,7 @@ enum {
 	BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
 	BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
 	BPF_F_ADJ_ROOM_NO_CSUM_RESET	= (1ULL << 5),
+	BPF_F_ADJ_ROOM_ENCAP_L2_ETH	= (1ULL << 6),
 };
 
 enum {
@@ -5211,7 +5964,10 @@ struct bpf_pidns_info {
 
 /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
 struct bpf_sk_lookup {
-	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+	union {
+		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+	};
 
 	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
 	};
 };
 
-#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_INFO_KFLAG(info)	((info) >> 31)
 
@@ -72,7 +72,8 @@ struct btf_type {
 #define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
 #define BTF_KIND_VAR		14	/* Variable	*/
 #define BTF_KIND_DATASEC	15	/* Section	*/
-#define BTF_KIND_MAX		BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT		16	/* Floating point	*/
+#define BTF_KIND_MAX		BTF_KIND_FLOAT
 #define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h
index 7239aa9c0766..8df2d9934bcd 100644
--- a/include/uapi/linux/if_fddi.h
+++ b/include/uapi/linux/if_fddi.h
@@ -9,7 +9,7 @@
  * Version:	@(#)if_fddi.h	1.0.3	Oct  6 2018
  *
  * Author:	Lawrence V. Stefani, <stefani@yahoo.com>
- * Maintainer:	Maciej W. Rozycki, <macro@linux-mips.org>
+ * Maintainer:	Maciej W. Rozycki, <macro@orcam.me.uk>
  *
  *		if_fddi.h is based on previous if_ether.h and if_tr.h work by
  *			Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index 2d4a1e784cf0..d8ffa8c9ca78 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -21,7 +21,10 @@ struct nexthop_grp {
 };
 
 enum {
-	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */
+	NEXTHOP_GRP_TYPE_MPATH,  /* hash-threshold nexthop group
+				  * default type if not specified
+				  */
+	NEXTHOP_GRP_TYPE_RES,    /* resilient nexthop group */
 	__NEXTHOP_GRP_TYPE_MAX,
 };
 
@@ -52,8 +55,50 @@ enum {
 	NHA_FDB,	/* flag; nexthop belongs to a bridge fdb */
 	/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
 
+	/* nested; resilient nexthop group attributes */
+	NHA_RES_GROUP,
+	/* nested; nexthop bucket attributes */
+	NHA_RES_BUCKET,
+
 	__NHA_MAX,
 };
 
 #define NHA_MAX	(__NHA_MAX - 1)
+
+enum {
+	NHA_RES_GROUP_UNSPEC,
+	/* Pad attribute for 64-bit alignment. */
+	NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC,
+
+	/* u16; number of nexthop buckets in a resilient nexthop group */
+	NHA_RES_GROUP_BUCKETS,
+	/* clock_t as u32; nexthop bucket idle timer (per-group) */
+	NHA_RES_GROUP_IDLE_TIMER,
+	/* clock_t as u32; nexthop unbalanced timer */
+	NHA_RES_GROUP_UNBALANCED_TIMER,
+	/* clock_t as u64; nexthop unbalanced time */
+	NHA_RES_GROUP_UNBALANCED_TIME,
+
+	__NHA_RES_GROUP_MAX,
+};
+
+#define NHA_RES_GROUP_MAX	(__NHA_RES_GROUP_MAX - 1)
+
+enum {
+	NHA_RES_BUCKET_UNSPEC,
+	/* Pad attribute for 64-bit alignment. */
+	NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC,
+
+	/* u16; nexthop bucket index */
+	NHA_RES_BUCKET_INDEX,
+	/* clock_t as u64; nexthop bucket idle time */
+	NHA_RES_BUCKET_IDLE_TIME,
+	/* u32; nexthop id assigned to the nexthop bucket */
+	NHA_RES_BUCKET_NH_ID,
+
+	__NHA_RES_BUCKET_MAX,
+};
+
+#define NHA_RES_BUCKET_MAX	(__NHA_RES_BUCKET_MAX - 1)
+
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7ea59cfe1fa7..025c40fef93d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -190,6 +190,8 @@ enum {
 	TCA_POLICE_PAD,
 	TCA_POLICE_RATE64,
 	TCA_POLICE_PEAKRATE64,
+	TCA_POLICE_PKTRATE64,
+	TCA_POLICE_PKTBURST64,
 	__TCA_POLICE_MAX
 #define TCA_POLICE_RESULT TCA_POLICE_RESULT
 };
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index bff5032c98df..e585db5bf2d2 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -13,6 +13,13 @@ enum {
 	PSAMPLE_ATTR_GROUP_REFCOUNT,
 	PSAMPLE_ATTR_TUNNEL,
 
+	PSAMPLE_ATTR_PAD,
+	PSAMPLE_ATTR_OUT_TC,		/* u16 */
+	PSAMPLE_ATTR_OUT_TC_OCC,	/* u64, bytes */
+	PSAMPLE_ATTR_LATENCY,		/* u64, nanoseconds */
+	PSAMPLE_ATTR_TIMESTAMP,		/* u64, nanoseconds */
+	PSAMPLE_ATTR_PROTO,		/* u16 */
+
 	__PSAMPLE_ATTR_MAX
 };
 
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 91e4ca064d61..5888492a5257 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -178,6 +178,13 @@ enum {
 	RTM_GETVLAN,
 #define RTM_GETVLAN	RTM_GETVLAN
 
+	RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET	RTM_NEWNEXTHOPBUCKET
+	RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET	RTM_DELNEXTHOPBUCKET
+	RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET	RTM_GETNEXTHOPBUCKET
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
@@ -283,6 +290,7 @@ enum {
 #define RTPROT_MROUTED		17	/* Multicast daemon */
 #define RTPROT_KEEPALIVED	18	/* Keepalived daemon */
 #define RTPROT_BABEL		42	/* Babel daemon */
+#define RTPROT_OPENR		99	/* Open Routing (Open/R) Routes */
 #define RTPROT_BGP		186	/* BGP Routes */
 #define RTPROT_ISIS		187	/* ISIS Routes */
 #define RTPROT_OSPF		188	/* OSPF Routes */
diff --git a/init/Kconfig b/init/Kconfig
index 5f5c776ef192..5deae45b8d81 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1708,6 +1708,7 @@ config BPF_SYSCALL
 	select BPF
 	select IRQ_WORK
 	select TASKS_TRACE_RCU
+	select NET_SOCK_MSG if INET
 	default n
 	help
 	  Enable the bpf() system call that allows to manipulate eBPF
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index d1249340fd6b..7f33098ca63f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
-obj-${CONFIG_BPF_LSM}	  += bpf_task_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
@@ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
-obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
 obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
 endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1f8453343bf2..463d25e1e67e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_array_map_info),
 };
 
+static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
+				   void *callback_ctx, u64 flags)
+{
+	u32 i, key, num_elems = 0;
+	struct bpf_array *array;
+	bool is_percpu;
+	u64 ret = 0;
+	void *val;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+	array = container_of(map, struct bpf_array, map);
+	if (is_percpu)
+		migrate_disable();
+	for (i = 0; i < map->max_entries; i++) {
+		if (is_percpu)
+			val = this_cpu_ptr(array->pptrs[i]);
+		else
+			val = array->value + array->elem_size * i;
+		num_elems++;
+		key = i;
+		ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
+					(u64)(long)&key, (u64)(long)val,
+					(u64)(long)callback_ctx, 0);
+		/* return value: 0 - continue, 1 - stop and return */
+		if (ret)
+			break;
+	}
+
+	if (is_percpu)
+		migrate_enable();
+	return num_elems;
+}
+
 static int array_map_btf_id;
 const struct bpf_map_ops array_map_ops = {
 	.map_meta_equal = array_map_meta_equal,
@@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
 	.map_check_btf = array_map_check_btf,
 	.map_lookup_batch = generic_map_lookup_batch,
 	.map_update_batch = generic_map_update_batch,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_array_elem,
 	.map_btf_name = "bpf_array",
 	.map_btf_id = &array_map_btf_id,
 	.iter_seq_info = &iter_seq_info,
@@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_delete_elem = array_map_delete_elem,
 	.map_seq_show_elem = percpu_array_map_seq_show_elem,
 	.map_check_btf = array_map_check_btf,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_array_elem,
 	.map_btf_name = "bpf_array",
 	.map_btf_id = &percpu_array_map_btf_id,
 	.iter_seq_info = &iter_seq_info,
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index b58b2efb9b43..2921ca39a93e 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -237,7 +237,7 @@ static void inode_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_local_storage_map *)map;
 	bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
-	bpf_local_storage_map_free(smap);
+	bpf_local_storage_map_free(smap, NULL);
 }
 
 static int inode_storage_map_btf_id;
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index a0d9eade9c80..931870f9cf56 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 	 */
 	return ret == 0 ? 0 : -EAGAIN;
 }
+
+BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
+	   void *, callback_ctx, u64, flags)
+{
+	return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
+}
+
+const struct bpf_func_proto bpf_for_each_map_elem_proto = {
+	.func		= bpf_for_each_map_elem,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_FUNC,
+	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+};
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index dd5aedee99e7..b305270b7a4b 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage *local_storage;
 	bool free_local_storage = false;
+	unsigned long flags;
 
 	if (unlikely(!selem_linked_to_storage(selem)))
 		/* selem has already been unlinked from sk */
 		return;
 
 	local_storage = rcu_dereference(selem->local_storage);
-	raw_spin_lock_bh(&local_storage->lock);
+	raw_spin_lock_irqsave(&local_storage->lock, flags);
 	if (likely(selem_linked_to_storage(selem)))
 		free_local_storage = bpf_selem_unlink_storage_nolock(
 			local_storage, selem, true);
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 
 	if (free_local_storage)
 		kfree_rcu(local_storage, rcu);
@@ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map *smap;
 	struct bpf_local_storage_map_bucket *b;
+	unsigned long flags;
 
 	if (unlikely(!selem_linked_to_map(selem)))
 		/* selem has already be unlinked from smap */
@@ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 
 	smap = rcu_dereference(SDATA(selem)->smap);
 	b = select_bucket(smap, selem);
-	raw_spin_lock_bh(&b->lock);
+	raw_spin_lock_irqsave(&b->lock, flags);
 	if (likely(selem_linked_to_map(selem)))
 		hlist_del_init_rcu(&selem->map_node);
-	raw_spin_unlock_bh(&b->lock);
+	raw_spin_unlock_irqrestore(&b->lock, flags);
 }
 
 void bpf_selem_link_map(struct bpf_local_storage_map *smap,
 			struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
+	unsigned long flags;
 
-	raw_spin_lock_bh(&b->lock);
+	raw_spin_lock_irqsave(&b->lock, flags);
 	RCU_INIT_POINTER(SDATA(selem)->smap, smap);
 	hlist_add_head_rcu(&selem->map_node, &b->list);
-	raw_spin_unlock_bh(&b->lock);
+	raw_spin_unlock_irqrestore(&b->lock, flags);
 }
 
 void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
@@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 
 	sdata = SDATA(selem);
 	if (cacheit_lockit) {
+		unsigned long flags;
+
 		/* spinlock is needed to avoid racing with the
 		 * parallel delete.  Otherwise, publishing an already
 		 * deleted sdata to the cache will become a use-after-free
 		 * problem in the next bpf_local_storage_lookup().
 		 */
-		raw_spin_lock_bh(&local_storage->lock);
+		raw_spin_lock_irqsave(&local_storage->lock, flags);
 		if (selem_linked_to_storage(selem))
 			rcu_assign_pointer(local_storage->cache[smap->cache_idx],
 					   sdata);
-		raw_spin_unlock_bh(&local_storage->lock);
+		raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 	}
 
 	return sdata;
@@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	struct bpf_local_storage_data *old_sdata = NULL;
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage *local_storage;
+	unsigned long flags;
 	int err;
 
 	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 		}
 	}
 
-	raw_spin_lock_bh(&local_storage->lock);
+	raw_spin_lock_irqsave(&local_storage->lock, flags);
 
 	/* Recheck local_storage->list under local_storage->lock */
 	if (unlikely(hlist_empty(&local_storage->list))) {
@@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	}
 
 unlock:
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 	return SDATA(selem);
 
 unlock_err:
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 	return ERR_PTR(err);
 }
 
@@ -468,7 +474,8 @@ void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
 	spin_unlock(&cache->idx_lock);
 }
 
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+				int __percpu *busy_counter)
 {
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage_map_bucket *b;
@@ -497,7 +504,15 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
 		while ((selem = hlist_entry_safe(
 				rcu_dereference_raw(hlist_first_rcu(&b->list)),
 				struct bpf_local_storage_elem, map_node))) {
+			if (busy_counter) {
+				migrate_disable();
+				__this_cpu_inc(*busy_counter);
+			}
 			bpf_selem_unlink(selem);
+			if (busy_counter) {
+				__this_cpu_dec(*busy_counter);
+				migrate_enable();
+			}
 			cond_resched_rcu();
 		}
 		rcu_read_unlock();
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 1622a44d1617..9829f381b51c 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_spin_lock_proto;
 	case BPF_FUNC_spin_unlock:
 		return &bpf_spin_unlock_proto;
-	case BPF_FUNC_task_storage_get:
-		return &bpf_task_storage_get_proto;
-	case BPF_FUNC_task_storage_delete:
-		return &bpf_task_storage_delete_proto;
 	case BPF_FUNC_bprm_opts_set:
 		return &bpf_bprm_opts_set_proto;
 	case BPF_FUNC_ima_inode_hash:
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index e0da0258b732..fd3c74ef608e 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -15,21 +15,41 @@
 #include <linux/bpf_local_storage.h>
 #include <linux/filter.h>
 #include <uapi/linux/btf.h>
-#include <linux/bpf_lsm.h>
 #include <linux/btf_ids.h>
 #include <linux/fdtable.h>
 
 DEFINE_BPF_STORAGE_CACHE(task_cache);
 
+DEFINE_PER_CPU(int, bpf_task_storage_busy);
+
+static void bpf_task_storage_lock(void)
+{
+	migrate_disable();
+	__this_cpu_inc(bpf_task_storage_busy);
+}
+
+static void bpf_task_storage_unlock(void)
+{
+	__this_cpu_dec(bpf_task_storage_busy);
+	migrate_enable();
+}
+
+static bool bpf_task_storage_trylock(void)
+{
+	migrate_disable();
+	if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
+		__this_cpu_dec(bpf_task_storage_busy);
+		migrate_enable();
+		return false;
+	}
+	return true;
+}
+
 static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
 {
 	struct task_struct *task = owner;
-	struct bpf_storage_blob *bsb;
 
-	bsb = bpf_task(task);
-	if (!bsb)
-		return NULL;
-	return &bsb->storage;
+	return &task->bpf_storage;
 }
 
 static struct bpf_local_storage_data *
@@ -38,13 +58,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
 {
 	struct bpf_local_storage *task_storage;
 	struct bpf_local_storage_map *smap;
-	struct bpf_storage_blob *bsb;
-
-	bsb = bpf_task(task);
-	if (!bsb)
-		return NULL;
 
-	task_storage = rcu_dereference(bsb->storage);
+	task_storage = rcu_dereference(task->bpf_storage);
 	if (!task_storage)
 		return NULL;
 
@@ -57,16 +72,12 @@ void bpf_task_storage_free(struct task_struct *task)
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage *local_storage;
 	bool free_task_storage = false;
-	struct bpf_storage_blob *bsb;
 	struct hlist_node *n;
-
-	bsb = bpf_task(task);
-	if (!bsb)
-		return;
+	unsigned long flags;
 
 	rcu_read_lock();
 
-	local_storage = rcu_dereference(bsb->storage);
+	local_storage = rcu_dereference(task->bpf_storage);
 	if (!local_storage) {
 		rcu_read_unlock();
 		return;
@@ -81,7 +92,8 @@ void bpf_task_storage_free(struct task_struct *task)
 	 * when unlinking elem from the local_storage->list and
 	 * the map's bucket->list.
 	 */
-	raw_spin_lock_bh(&local_storage->lock);
+	bpf_task_storage_lock();
+	raw_spin_lock_irqsave(&local_storage->lock, flags);
 	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
 		/* Always unlink from map before unlinking from
 		 * local_storage.
@@ -90,7 +102,8 @@ void bpf_task_storage_free(struct task_struct *task)
 		free_task_storage = bpf_selem_unlink_storage_nolock(
 			local_storage, selem, false);
 	}
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+	bpf_task_storage_unlock();
 	rcu_read_unlock();
 
 	/* free_task_storage should always be true as long as
@@ -123,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	sdata = task_storage_lookup(task, map, true);
+	bpf_task_storage_unlock();
 	put_pid(pid);
 	return sdata ? sdata->data : NULL;
 out:
@@ -150,13 +165,15 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
 	 */
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	task = pid_task(pid, PIDTYPE_PID);
-	if (!task || !task_storage_ptr(task)) {
+	if (!task) {
 		err = -ENOENT;
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	sdata = bpf_local_storage_update(
 		task, (struct bpf_local_storage_map *)map, value, map_flags);
+	bpf_task_storage_unlock();
 
 	err = PTR_ERR_OR_ZERO(sdata);
 out:
@@ -199,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	err = task_storage_delete(task, map);
+	bpf_task_storage_unlock();
 out:
 	put_pid(pid);
 	return err;
@@ -213,44 +232,47 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
 		return (unsigned long)NULL;
 
-	/* explicitly check that the task_storage_ptr is not
-	 * NULL as task_storage_lookup returns NULL in this case and
-	 * bpf_local_storage_update expects the owner to have a
-	 * valid storage pointer.
-	 */
-	if (!task || !task_storage_ptr(task))
+	if (!task)
+		return (unsigned long)NULL;
+
+	if (!bpf_task_storage_trylock())
 		return (unsigned long)NULL;
 
 	sdata = task_storage_lookup(task, map, true);
 	if (sdata)
-		return (unsigned long)sdata->data;
+		goto unlock;
 
-	/* This helper must only be called from places where the lifetime of the task
-	 * is guaranteed. Either by being refcounted or by being protected
-	 * by an RCU read-side critical section.
-	 */
-	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+	/* only allocate new storage, when the task is refcounted */
+	if (refcount_read(&task->usage) &&
+	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
 		sdata = bpf_local_storage_update(
 			task, (struct bpf_local_storage_map *)map, value,
 			BPF_NOEXIST);
-		return IS_ERR(sdata) ? (unsigned long)NULL :
-					     (unsigned long)sdata->data;
-	}
 
-	return (unsigned long)NULL;
+unlock:
+	bpf_task_storage_unlock();
+	return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
+		(unsigned long)sdata->data;
 }
 
 BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
 	   task)
 {
+	int ret;
+
 	if (!task)
 		return -EINVAL;
 
+	if (!bpf_task_storage_trylock())
+		return -EBUSY;
+
 	/* This helper must only be called from places where the lifetime of the task
 	 * is guaranteed. Either by being refcounted or by being protected
 	 * by an RCU read-side critical section.
 	 */
-	return task_storage_delete(task, map);
+	ret = task_storage_delete(task, map);
+	bpf_task_storage_unlock();
+	return ret;
 }
 
 static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -276,7 +298,7 @@ static void task_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_local_storage_map *)map;
 	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
-	bpf_local_storage_map_free(smap);
+	bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
 }
 
 static int task_storage_map_btf_id;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index b1a76fe046cb..369faeddf1df 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -173,7 +173,7 @@
 #define BITS_ROUNDUP_BYTES(bits) \
 	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
 
-#define BTF_INFO_MASK 0x8f00ffff
+#define BTF_INFO_MASK 0x9f00ffff
 #define BTF_INT_MASK 0x0fffffff
 #define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
 #define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
@@ -280,6 +280,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
+	[BTF_KIND_FLOAT]	= "FLOAT",
 };
 
 static const char *btf_type_str(const struct btf_type *t)
@@ -574,6 +575,7 @@ static bool btf_type_has_size(const struct btf_type *t)
 	case BTF_KIND_UNION:
 	case BTF_KIND_ENUM:
 	case BTF_KIND_DATASEC:
+	case BTF_KIND_FLOAT:
 		return true;
 	}
 
@@ -1704,6 +1706,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
 		case BTF_KIND_ENUM:
+		case BTF_KIND_FLOAT:
 			size = type->size;
 			goto resolved;
 
@@ -1849,7 +1852,7 @@ static int btf_df_check_kflag_member(struct btf_verifier_env *env,
 	return -EINVAL;
 }
 
-/* Used for ptr, array and struct/union type members.
+/* Used for ptr, array struct/union and float type members.
  * int, enum and modifier types have their specific callback functions.
  */
 static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
@@ -3675,6 +3678,81 @@ static const struct btf_kind_operations datasec_ops = {
 	.show			= btf_datasec_show,
 };
 
+static s32 btf_float_check_meta(struct btf_verifier_env *env,
+				const struct btf_type *t,
+				u32 meta_left)
+{
+	if (btf_type_vlen(t)) {
+		btf_verifier_log_type(env, t, "vlen != 0");
+		return -EINVAL;
+	}
+
+	if (btf_type_kflag(t)) {
+		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+		return -EINVAL;
+	}
+
+	if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 &&
+	    t->size != 16) {
+		btf_verifier_log_type(env, t, "Invalid type_size");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return 0;
+}
+
+static int btf_float_check_member(struct btf_verifier_env *env,
+				  const struct btf_type *struct_type,
+				  const struct btf_member *member,
+				  const struct btf_type *member_type)
+{
+	u64 start_offset_bytes;
+	u64 end_offset_bytes;
+	u64 misalign_bits;
+	u64 align_bytes;
+	u64 align_bits;
+
+	/* Different architectures have different alignment requirements, so
+	 * here we check only for the reasonable minimum. This way we ensure
+	 * that types after CO-RE can pass the kernel BTF verifier.
+	 */
+	align_bytes = min_t(u64, sizeof(void *), member_type->size);
+	align_bits = align_bytes * BITS_PER_BYTE;
+	div64_u64_rem(member->offset, align_bits, &misalign_bits);
+	if (misalign_bits) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member is not properly aligned");
+		return -EINVAL;
+	}
+
+	start_offset_bytes = member->offset / BITS_PER_BYTE;
+	end_offset_bytes = start_offset_bytes + member_type->size;
+	if (end_offset_bytes > struct_type->size) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member exceeds struct_size");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void btf_float_log(struct btf_verifier_env *env,
+			  const struct btf_type *t)
+{
+	btf_verifier_log(env, "size=%u", t->size);
+}
+
+static const struct btf_kind_operations float_ops = {
+	.check_meta = btf_float_check_meta,
+	.resolve = btf_df_resolve,
+	.check_member = btf_float_check_member,
+	.check_kflag_member = btf_generic_check_kflag_member,
+	.log_details = btf_float_log,
+	.show = btf_df_show,
+};
+
 static int btf_func_proto_check(struct btf_verifier_env *env,
 				const struct btf_type *t)
 {
@@ -3808,6 +3886,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
 	[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
 	[BTF_KIND_VAR] = &var_ops,
 	[BTF_KIND_DATASEC] = &datasec_ops,
+	[BTF_KIND_FLOAT] = &float_ops,
 };
 
 static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -4592,8 +4671,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	}
 	arg = off / 8;
 	args = (const struct btf_param *)(t + 1);
-	/* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
-	nr_args = t ? btf_type_vlen(t) : 5;
+	/* if (t == NULL) Fall back to default BPF prog with
+	 * MAX_BPF_FUNC_REG_ARGS u64 arguments.
+	 */
+	nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS;
 	if (prog->aux->attach_btf_trace) {
 		/* skip first 'void *__data' argument in btf_trace_##name typedef */
 		args++;
@@ -4649,7 +4730,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		}
 	} else {
 		if (!t)
-			/* Default prog with 5 args */
+			/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
 			return true;
 		t = btf_type_by_id(btf, args[arg].type);
 	}
@@ -5100,12 +5181,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 
 	if (!func) {
 		/* BTF function prototype doesn't match the verifier types.
-		 * Fall back to 5 u64 args.
+		 * Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
 		 */
-		for (i = 0; i < 5; i++)
+		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
 			m->arg_size[i] = 8;
 		m->ret_size = 8;
-		m->nr_args = 5;
+		m->nr_args = MAX_BPF_FUNC_REG_ARGS;
 		return 0;
 	}
 	args = (const struct btf_param *)(func + 1);
@@ -5328,8 +5409,9 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
 	}
 	args = (const struct btf_param *)(t + 1);
 	nargs = btf_type_vlen(t);
-	if (nargs > 5) {
-		bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
+	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+		bpf_log(log, "Function %s has %d > %d args\n", tname, nargs,
+			MAX_BPF_FUNC_REG_ARGS);
 		goto out;
 	}
 
@@ -5458,9 +5540,9 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 	}
 	args = (const struct btf_param *)(t + 1);
 	nargs = btf_type_vlen(t);
-	if (nargs > 5) {
-		bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n",
-			tname, nargs);
+	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+		bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
+			tname, nargs, MAX_BPF_FUNC_REG_ARGS);
 		return -EINVAL;
 	}
 	/* check that function returns int */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 5d1469de6921..0cf2791d5099 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
 	 * complete.
 	 */
 
-	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* For cpu_map the remote CPUs can still be using the entries
@@ -563,7 +562,7 @@ static void cpu_map_free(struct bpf_map *map)
 	kfree(cmap);
 }
 
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
+static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
 	struct bpf_cpu_map_entry *rcpu;
@@ -600,6 +599,11 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
+static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
+}
+
 static int cpu_map_btf_id;
 const struct bpf_map_ops cpu_map_ops = {
 	.map_meta_equal		= bpf_map_meta_equal,
@@ -612,6 +616,7 @@ const struct bpf_map_ops cpu_map_ops = {
 	.map_check_btf		= map_check_no_btf,
 	.map_btf_name		= "bpf_cpu_map",
 	.map_btf_id		= &cpu_map_btf_id,
+	.map_redirect		= cpu_map_redirect,
 };
 
 static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 85d9d1b72a33..7a5ad7331c3b 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
 	list_del_rcu(&dtab->list);
 	spin_unlock(&dev_map_lock);
 
-	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* Make sure prior __dev_map_entry_free() have completed. */
@@ -258,7 +257,7 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
+static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct hlist_head *head = dev_map_index_hash(dtab, key);
@@ -392,7 +391,7 @@ void __dev_flush(void)
  * update happens in parallel here a dev_put wont happen until after reading the
  * ifindex.
  */
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *obj;
@@ -735,6 +734,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
 					 map, key, value, map_flags);
 }
 
+static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
+}
+
+static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
+}
+
 static int dev_map_btf_id;
 const struct bpf_map_ops dev_map_ops = {
 	.map_meta_equal = bpf_map_meta_equal,
@@ -747,6 +756,7 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_check_btf = map_check_no_btf,
 	.map_btf_name = "bpf_dtab",
 	.map_btf_id = &dev_map_btf_id,
+	.map_redirect = dev_map_redirect,
 };
 
 static int dev_map_hash_map_btf_id;
@@ -761,6 +771,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
 	.map_check_btf = map_check_no_btf,
 	.map_btf_name = "bpf_dtab",
 	.map_btf_id = &dev_map_hash_map_btf_id,
+	.map_redirect = dev_hash_map_redirect,
 };
 
 static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d63912e73ad9..330d721dd2af 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_hash_map_info),
 };
 
+static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
+				  void *callback_ctx, u64 flags)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct hlist_nulls_head *head;
+	struct hlist_nulls_node *n;
+	struct htab_elem *elem;
+	u32 roundup_key_size;
+	int i, num_elems = 0;
+	void __percpu *pptr;
+	struct bucket *b;
+	void *key, *val;
+	bool is_percpu;
+	u64 ret = 0;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	is_percpu = htab_is_percpu(htab);
+
+	roundup_key_size = round_up(map->key_size, 8);
+	/* disable migration so percpu value prepared here will be the
+	 * same as the one seen by the bpf program with bpf_map_lookup_elem().
+	 */
+	if (is_percpu)
+		migrate_disable();
+	for (i = 0; i < htab->n_buckets; i++) {
+		b = &htab->buckets[i];
+		rcu_read_lock();
+		head = &b->head;
+		hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
+			key = elem->key;
+			if (is_percpu) {
+				/* current cpu value for percpu map */
+				pptr = htab_elem_get_ptr(elem, map->key_size);
+				val = this_cpu_ptr(pptr);
+			} else {
+				val = elem->key + roundup_key_size;
+			}
+			num_elems++;
+			ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
+					(u64)(long)key, (u64)(long)val,
+					(u64)(long)callback_ctx, 0);
+			/* return value: 0 - continue, 1 - stop and return */
+			if (ret) {
+				rcu_read_unlock();
+				goto out;
+			}
+		}
+		rcu_read_unlock();
+	}
+out:
+	if (is_percpu)
+		migrate_enable();
+	return num_elems;
+}
+
 static int htab_map_btf_id;
 const struct bpf_map_ops htab_map_ops = {
 	.map_meta_equal = bpf_map_meta_equal,
@@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_map_btf_id,
@@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_gen_lookup = htab_lru_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab_lru),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_lru_map_btf_id,
@@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = {
 	.map_update_elem = htab_percpu_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab_percpu),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_percpu_map_btf_id,
@@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
 	.map_update_elem = htab_lru_percpu_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab_lru_percpu),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_lru_percpu_map_btf_id,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 308427fe03a3..074800226327 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ringbuf_discard_proto;
 	case BPF_FUNC_ringbuf_query:
 		return &bpf_ringbuf_query_proto;
+	case BPF_FUNC_for_each_map_elem:
+		return &bpf_for_each_map_elem_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 44e4ec1640f1..999bf36ffeb1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
 	       insn->src_reg == BPF_PSEUDO_CALL;
 }
 
+static bool bpf_pseudo_func(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+	       insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
 struct bpf_call_arg_meta {
 	struct bpf_map *map_ptr;
 	bool raw_mode;
@@ -248,6 +254,7 @@ struct bpf_call_arg_meta {
 	u32 btf_id;
 	struct btf *ret_btf;
 	u32 ret_btf_id;
+	u32 subprogno;
 };
 
 struct btf *btf_vmlinux;
@@ -390,6 +397,24 @@ __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 	env->prev_linfo = linfo;
 }
 
+static void verbose_invalid_scalar(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *reg,
+				   struct tnum *range, const char *ctx,
+				   const char *reg_name)
+{
+	char tn_buf[48];
+
+	verbose(env, "At %s the register %s ", ctx, reg_name);
+	if (!tnum_is_unknown(reg->var_off)) {
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose(env, "has value %s", tn_buf);
+	} else {
+		verbose(env, "has unknown scalar value");
+	}
+	tnum_strn(tn_buf, sizeof(tn_buf), *range);
+	verbose(env, " should have been in %s\n", tn_buf);
+}
+
 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 {
 	return type == PTR_TO_PACKET ||
@@ -409,6 +434,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
 	return type == PTR_TO_SOCKET ||
 		type == PTR_TO_TCP_SOCK ||
 		type == PTR_TO_MAP_VALUE ||
+		type == PTR_TO_MAP_KEY ||
 		type == PTR_TO_SOCK_COMMON;
 }
 
@@ -451,7 +477,8 @@ static bool arg_type_may_be_null(enum bpf_arg_type type)
 	       type == ARG_PTR_TO_MEM_OR_NULL ||
 	       type == ARG_PTR_TO_CTX_OR_NULL ||
 	       type == ARG_PTR_TO_SOCKET_OR_NULL ||
-	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
+	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
+	       type == ARG_PTR_TO_STACK_OR_NULL;
 }
 
 /* Determine whether the function releases some resources allocated by another
@@ -541,6 +568,8 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
 	[PTR_TO_RDWR_BUF]	= "rdwr_buf",
 	[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
+	[PTR_TO_FUNC]		= "func",
+	[PTR_TO_MAP_KEY]	= "map_key",
 };
 
 static char slot_type_char[] = {
@@ -612,6 +641,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 			if (type_is_pkt_pointer(t))
 				verbose(env, ",r=%d", reg->range);
 			else if (t == CONST_PTR_TO_MAP ||
+				 t == PTR_TO_MAP_KEY ||
 				 t == PTR_TO_MAP_VALUE ||
 				 t == PTR_TO_MAP_VALUE_OR_NULL)
 				verbose(env, ",ks=%d,vs=%d",
@@ -1519,7 +1549,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 	}
 	ret = find_subprog(env, off);
 	if (ret >= 0)
-		return 0;
+		return ret;
 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
 		verbose(env, "too many subprograms\n");
 		return -E2BIG;
@@ -1527,7 +1557,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 	env->subprog_info[env->subprog_cnt++].start = off;
 	sort(env->subprog_info, env->subprog_cnt,
 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
-	return 0;
+	return env->subprog_cnt - 1;
 }
 
 static int check_subprogs(struct bpf_verifier_env *env)
@@ -1544,6 +1574,19 @@ static int check_subprogs(struct bpf_verifier_env *env)
 
 	/* determine subprog starts. The end is one before the next starts */
 	for (i = 0; i < insn_cnt; i++) {
+		if (bpf_pseudo_func(insn + i)) {
+			if (!env->bpf_capable) {
+				verbose(env,
+					"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
+				return -EPERM;
+			}
+			ret = add_subprog(env, i + insn[i].imm + 1);
+			if (ret < 0)
+				return ret;
+			/* remember subprog */
+			insn[i + 1].imm = ret;
+			continue;
+		}
 		if (!bpf_pseudo_call(insn + i))
 			continue;
 		if (!env->bpf_capable) {
@@ -2295,6 +2338,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_PERCPU_BTF_ID:
 	case PTR_TO_MEM:
 	case PTR_TO_MEM_OR_NULL:
+	case PTR_TO_FUNC:
+	case PTR_TO_MAP_KEY:
 		return true;
 	default:
 		return false;
@@ -2899,6 +2944,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno,
 
 	reg = &cur_regs(env)[regno];
 	switch (reg->type) {
+	case PTR_TO_MAP_KEY:
+		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
+			mem_size, off, size);
+		break;
 	case PTR_TO_MAP_VALUE:
 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
 			mem_size, off, size);
@@ -3304,6 +3353,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 	case PTR_TO_FLOW_KEYS:
 		pointer_desc = "flow keys ";
 		break;
+	case PTR_TO_MAP_KEY:
+		pointer_desc = "key ";
+		break;
 	case PTR_TO_MAP_VALUE:
 		pointer_desc = "value ";
 		break;
@@ -3405,7 +3457,7 @@ process_func:
 continue_func:
 	subprog_end = subprog[idx + 1].start;
 	for (; i < subprog_end; i++) {
-		if (!bpf_pseudo_call(insn + i))
+		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
 			continue;
 		/* remember insn and function to return to */
 		ret_insn[frame] = i + 1;
@@ -3842,7 +3894,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	/* for access checks, reg->off is just part of off */
 	off += reg->off;
 
-	if (reg->type == PTR_TO_MAP_VALUE) {
+	if (reg->type == PTR_TO_MAP_KEY) {
+		if (t == BPF_WRITE) {
+			verbose(env, "write to change key R%d not allowed\n", regno);
+			return -EACCES;
+		}
+
+		err = check_mem_region_access(env, regno, off, size,
+					      reg->map_ptr->key_size, false);
+		if (err)
+			return err;
+		if (value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
+	} else if (reg->type == PTR_TO_MAP_VALUE) {
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose(env, "R%d leaks addr into map\n", value_regno);
@@ -4258,6 +4322,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	case PTR_TO_PACKET_META:
 		return check_packet_access(env, regno, reg->off, access_size,
 					   zero_size_allowed);
+	case PTR_TO_MAP_KEY:
+		return check_mem_region_access(env, regno, reg->off, access_size,
+					       reg->map_ptr->key_size, false);
 	case PTR_TO_MAP_VALUE:
 		if (check_map_access_type(env, regno, reg->off, access_size,
 					  meta && meta->raw_mode ? BPF_WRITE :
@@ -4474,6 +4541,7 @@ static const struct bpf_reg_types map_key_value_types = {
 		PTR_TO_STACK,
 		PTR_TO_PACKET,
 		PTR_TO_PACKET_META,
+		PTR_TO_MAP_KEY,
 		PTR_TO_MAP_VALUE,
 	},
 };
@@ -4505,6 +4573,7 @@ static const struct bpf_reg_types mem_types = {
 		PTR_TO_STACK,
 		PTR_TO_PACKET,
 		PTR_TO_PACKET_META,
+		PTR_TO_MAP_KEY,
 		PTR_TO_MAP_VALUE,
 		PTR_TO_MEM,
 		PTR_TO_RDONLY_BUF,
@@ -4517,6 +4586,7 @@ static const struct bpf_reg_types int_ptr_types = {
 		PTR_TO_STACK,
 		PTR_TO_PACKET,
 		PTR_TO_PACKET_META,
+		PTR_TO_MAP_KEY,
 		PTR_TO_MAP_VALUE,
 	},
 };
@@ -4529,6 +4599,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T
 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
+static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
+static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@ -4557,6 +4629,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_INT]		= &int_ptr_types,
 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
+	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
+	[ARG_PTR_TO_STACK_OR_NULL]	= &stack_ptr_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4738,6 +4812,8 @@ skip_type_check:
 			verbose(env, "verifier internal error\n");
 			return -EFAULT;
 		}
+	} else if (arg_type == ARG_PTR_TO_FUNC) {
+		meta->subprogno = reg->subprogno;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		/* The access to this pointer is only checked when we hit the
 		 * next is_mem_size argument below.
@@ -5258,13 +5334,19 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
 	}
 }
 
-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-			   int *insn_idx)
+typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
+				   struct bpf_func_state *caller,
+				   struct bpf_func_state *callee,
+				   int insn_idx);
+
+static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			     int *insn_idx, int subprog,
+			     set_callee_state_fn set_callee_state_cb)
 {
 	struct bpf_verifier_state *state = env->cur_state;
 	struct bpf_func_info_aux *func_info_aux;
 	struct bpf_func_state *caller, *callee;
-	int i, err, subprog, target_insn;
+	int err;
 	bool is_global = false;
 
 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
@@ -5273,14 +5355,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -E2BIG;
 	}
 
-	target_insn = *insn_idx + insn->imm;
-	subprog = find_subprog(env, target_insn + 1);
-	if (subprog < 0) {
-		verbose(env, "verifier bug. No program starts at insn %d\n",
-			target_insn + 1);
-		return -EFAULT;
-	}
-
 	caller = state->frame[state->curframe];
 	if (state->frame[state->curframe + 1]) {
 		verbose(env, "verifier bug. Frame %d already allocated\n",
@@ -5335,11 +5409,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	if (err)
 		return err;
 
-	/* copy r1 - r5 args that callee can access.  The copy includes parent
-	 * pointers, which connects us up to the liveness chain
-	 */
-	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
-		callee->regs[i] = caller->regs[i];
+	err = set_callee_state_cb(env, caller, callee, *insn_idx);
+	if (err)
+		return err;
 
 	clear_caller_saved_regs(env, caller->regs);
 
@@ -5347,7 +5419,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	state->curframe++;
 
 	/* and go analyze first insn of the callee */
-	*insn_idx = target_insn;
+	*insn_idx = env->subprog_info[subprog].start - 1;
 
 	if (env->log.level & BPF_LOG_LEVEL) {
 		verbose(env, "caller:\n");
@@ -5358,6 +5430,92 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	return 0;
 }
 
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+				   struct bpf_func_state *caller,
+				   struct bpf_func_state *callee)
+{
+	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
+	 *      void *callback_ctx, u64 flags);
+	 * callback_fn(struct bpf_map *map, void *key, void *value,
+	 *      void *callback_ctx);
+	 */
+	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
+
+	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
+	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
+
+	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
+	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
+
+	/* pointer to stack or null */
+	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
+
+	/* unused */
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	return 0;
+}
+
+static int set_callee_state(struct bpf_verifier_env *env,
+			    struct bpf_func_state *caller,
+			    struct bpf_func_state *callee, int insn_idx)
+{
+	int i;
+
+	/* copy r1 - r5 args that callee can access.  The copy includes parent
+	 * pointers, which connects us up to the liveness chain
+	 */
+	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+		callee->regs[i] = caller->regs[i];
+	return 0;
+}
+
+static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			   int *insn_idx)
+{
+	int subprog, target_insn;
+
+	target_insn = *insn_idx + insn->imm + 1;
+	subprog = find_subprog(env, target_insn);
+	if (subprog < 0) {
+		verbose(env, "verifier bug. No program starts at insn %d\n",
+			target_insn);
+		return -EFAULT;
+	}
+
+	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
+}
+
+static int set_map_elem_callback_state(struct bpf_verifier_env *env,
+				       struct bpf_func_state *caller,
+				       struct bpf_func_state *callee,
+				       int insn_idx)
+{
+	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
+	struct bpf_map *map;
+	int err;
+
+	if (bpf_map_ptr_poisoned(insn_aux)) {
+		verbose(env, "tail_call abusing map_ptr\n");
+		return -EINVAL;
+	}
+
+	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
+	if (!map->ops->map_set_for_each_callback_args ||
+	    !map->ops->map_for_each_callback) {
+		verbose(env, "callback function not allowed for map\n");
+		return -ENOTSUPP;
+	}
+
+	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
+	if (err)
+		return err;
+
+	callee->in_callback_fn = true;
+	return 0;
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
@@ -5380,8 +5538,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 
 	state->curframe--;
 	caller = state->frame[state->curframe];
-	/* return to the caller whatever r0 had in the callee */
-	caller->regs[BPF_REG_0] = *r0;
+	if (callee->in_callback_fn) {
+		/* enforce R0 return value range [0, 1]. */
+		struct tnum range = tnum_range(0, 1);
+
+		if (r0->type != SCALAR_VALUE) {
+			verbose(env, "R0 not a scalar value\n");
+			return -EACCES;
+		}
+		if (!tnum_in(range, r0->var_off)) {
+			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
+			return -EINVAL;
+		}
+	} else {
+		/* return to the caller whatever r0 had in the callee */
+		caller->regs[BPF_REG_0] = *r0;
+	}
 
 	/* Transfer references to the caller */
 	err = transfer_reference_state(caller, callee);
@@ -5436,7 +5608,9 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	    func_id != BPF_FUNC_map_delete_elem &&
 	    func_id != BPF_FUNC_map_push_elem &&
 	    func_id != BPF_FUNC_map_pop_elem &&
-	    func_id != BPF_FUNC_map_peek_elem)
+	    func_id != BPF_FUNC_map_peek_elem &&
+	    func_id != BPF_FUNC_for_each_map_elem &&
+	    func_id != BPF_FUNC_redirect_map)
 		return 0;
 
 	if (map == NULL) {
@@ -5517,15 +5691,18 @@ static int check_reference_leak(struct bpf_verifier_env *env)
 	return state->acquired_refs ? -EINVAL : 0;
 }
 
-static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
+static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			     int *insn_idx_p)
 {
 	const struct bpf_func_proto *fn = NULL;
 	struct bpf_reg_state *regs;
 	struct bpf_call_arg_meta meta;
+	int insn_idx = *insn_idx_p;
 	bool changes_data;
-	int i, err;
+	int i, err, func_id;
 
 	/* find function prototype */
+	func_id = insn->imm;
 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
 			func_id);
@@ -5571,7 +5748,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 
 	meta.func_id = func_id;
 	/* check args */
-	for (i = 0; i < 5; i++) {
+	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
 		err = check_func_arg(env, i, &meta, fn);
 		if (err)
 			return err;
@@ -5621,6 +5798,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		return -EINVAL;
 	}
 
+	if (func_id == BPF_FUNC_for_each_map_elem) {
+		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+					set_map_elem_callback_state);
+		if (err < 0)
+			return -EINVAL;
+	}
+
 	/* reset caller saved regs */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(env, regs, caller_saved[i]);
@@ -5878,6 +6062,19 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 		else
 			*ptr_limit = -off - 1;
 		return *ptr_limit >= max ? -ERANGE : 0;
+	case PTR_TO_MAP_KEY:
+		/* Currently, this code is not exercised as the only use
+		 * is bpf_for_each_map_elem() helper which requires
+		 * bpf_capble. The code has been tested manually for
+		 * future use.
+		 */
+		if (mask_to_left) {
+			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
+		} else {
+			off = ptr_reg->smin_value + ptr_reg->off;
+			*ptr_limit = ptr_reg->map_ptr->key_size - off;
+		}
+		return 0;
 	case PTR_TO_MAP_VALUE:
 		max = ptr_reg->map_ptr->value_size;
 		if (mask_to_left) {
@@ -5909,7 +6106,7 @@ static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
 	     aux->alu_limit != alu_limit))
 		return -EACCES;
 
-	/* Corresponding fixup done in fixup_bpf_calls(). */
+	/* Corresponding fixup done in do_misc_fixups(). */
 	aux->alu_state = alu_state;
 	aux->alu_limit = alu_limit;
 	return 0;
@@ -6084,6 +6281,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
+	case PTR_TO_MAP_KEY:
 	case PTR_TO_MAP_VALUE:
 		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
 			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
@@ -8263,6 +8461,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return 0;
 	}
 
+	if (insn->src_reg == BPF_PSEUDO_FUNC) {
+		struct bpf_prog_aux *aux = env->prog->aux;
+		u32 subprogno = insn[1].imm;
+
+		if (!aux->func_info) {
+			verbose(env, "missing btf func_info\n");
+			return -EINVAL;
+		}
+		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
+			verbose(env, "callback function not static\n");
+			return -EINVAL;
+		}
+
+		dst_reg->type = PTR_TO_FUNC;
+		dst_reg->subprogno = subprogno;
+		return 0;
+	}
+
 	map = env->used_maps[aux->map_index];
 	mark_reg_known_zero(env, regs, insn->dst_reg);
 	dst_reg->map_ptr = map;
@@ -8491,17 +8707,7 @@ static int check_return_code(struct bpf_verifier_env *env)
 	}
 
 	if (!tnum_in(range, reg->var_off)) {
-		char tn_buf[48];
-
-		verbose(env, "At program exit the register R0 ");
-		if (!tnum_is_unknown(reg->var_off)) {
-			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-			verbose(env, "has value %s", tn_buf);
-		} else {
-			verbose(env, "has unknown scalar value");
-		}
-		tnum_strn(tn_buf, sizeof(tn_buf), range);
-		verbose(env, " should have been in %s\n", tn_buf);
+		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
 		return -EINVAL;
 	}
 
@@ -8628,6 +8834,27 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
 	return DONE_EXPLORING;
 }
 
+static int visit_func_call_insn(int t, int insn_cnt,
+				struct bpf_insn *insns,
+				struct bpf_verifier_env *env,
+				bool visit_callee)
+{
+	int ret;
+
+	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
+	if (ret)
+		return ret;
+
+	if (t + 1 < insn_cnt)
+		init_explored_state(env, t + 1);
+	if (visit_callee) {
+		init_explored_state(env, t);
+		ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
+				env, false);
+	}
+	return ret;
+}
+
 /* Visits the instruction at index t and returns one of the following:
  *  < 0 - an error occurred
  *  DONE_EXPLORING - the instruction was fully explored
@@ -8638,6 +8865,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
 	struct bpf_insn *insns = env->prog->insnsi;
 	int ret;
 
+	if (bpf_pseudo_func(insns + t))
+		return visit_func_call_insn(t, insn_cnt, insns, env, true);
+
 	/* All non-branch instructions have a single fall-through edge. */
 	if (BPF_CLASS(insns[t].code) != BPF_JMP &&
 	    BPF_CLASS(insns[t].code) != BPF_JMP32)
@@ -8648,18 +8878,8 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
 		return DONE_EXPLORING;
 
 	case BPF_CALL:
-		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
-		if (ret)
-			return ret;
-
-		if (t + 1 < insn_cnt)
-			init_explored_state(env, t + 1);
-		if (insns[t].src_reg == BPF_PSEUDO_CALL) {
-			init_explored_state(env, t);
-			ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
-					env, false);
-		}
-		return ret;
+		return visit_func_call_insn(t, insn_cnt, insns, env,
+					    insns[t].src_reg == BPF_PSEUDO_CALL);
 
 	case BPF_JA:
 		if (BPF_SRC(insns[t].code) != BPF_K)
@@ -9272,6 +9492,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 			 */
 			return false;
 		}
+	case PTR_TO_MAP_KEY:
 	case PTR_TO_MAP_VALUE:
 		/* If the new min/max/var_off satisfy the old ones and
 		 * everything else matches, we are OK.
@@ -10118,10 +10339,9 @@ static int do_check(struct bpf_verifier_env *env)
 				if (insn->src_reg == BPF_PSEUDO_CALL)
 					err = check_func_call(env, insn, &env->insn_idx);
 				else
-					err = check_helper_call(env, insn->imm, env->insn_idx);
+					err = check_helper_call(env, insn, &env->insn_idx);
 				if (err)
 					return err;
-
 			} else if (opcode == BPF_JA) {
 				if (BPF_SRC(insn->code) != BPF_K ||
 				    insn->imm != 0 ||
@@ -10550,6 +10770,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 				goto next_insn;
 			}
 
+			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
+				aux = &env->insn_aux_data[i];
+				aux->ptr_type = PTR_TO_FUNC;
+				goto next_insn;
+			}
+
 			/* In final convert_pseudo_ld_imm64() step, this is
 			 * converted into regular 64-bit imm load insn.
 			 */
@@ -10682,9 +10908,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
 	int insn_cnt = env->prog->len;
 	int i;
 
-	for (i = 0; i < insn_cnt; i++, insn++)
-		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
-			insn->src_reg = 0;
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
+			continue;
+		if (insn->src_reg == BPF_PSEUDO_FUNC)
+			continue;
+		insn->src_reg = 0;
+	}
 }
 
 /* single env->prog->insni[off] instruction was replaced with the range
@@ -11323,6 +11553,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		return 0;
 
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			env->insn_aux_data[i].call_imm = insn->imm;
+			/* subprog is encoded in insn[1].imm */
+			continue;
+		}
+
 		if (!bpf_pseudo_call(insn))
 			continue;
 		/* Upon error here we cannot fall back to interpreter but
@@ -11452,6 +11688,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	for (i = 0; i < env->subprog_cnt; i++) {
 		insn = func[i]->insnsi;
 		for (j = 0; j < func[i]->len; j++, insn++) {
+			if (bpf_pseudo_func(insn)) {
+				subprog = insn[1].imm;
+				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
+				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
+				continue;
+			}
 			if (!bpf_pseudo_call(insn))
 				continue;
 			subprog = insn->off;
@@ -11497,6 +11739,11 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	 * later look the same as if they were interpreted only.
 	 */
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			insn[0].imm = env->insn_aux_data[i].call_imm;
+			insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
+			continue;
+		}
 		if (!bpf_pseudo_call(insn))
 			continue;
 		insn->off = env->insn_aux_data[i].call_imm;
@@ -11561,6 +11808,14 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 		return -EINVAL;
 	}
 	for (i = 0; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			/* When JIT fails the progs with callback calls
+			 * have to be rejected, since interpreter doesn't support them yet.
+			 */
+			verbose(env, "callbacks are not allowed in non-JITed programs\n");
+			return -EINVAL;
+		}
+
 		if (!bpf_pseudo_call(insn))
 			continue;
 		depth = get_callee_stack_depth(env, insn, i);
@@ -11573,12 +11828,10 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 	return err;
 }
 
-/* fixup insn->imm field of bpf_call instructions
- * and inline eligible helpers as explicit sequence of BPF instructions
- *
- * this function is called after eBPF program passed verification
+/* Do various post-verification rewrites in a single program pass.
+ * These rewrites simplify JIT and interpreter implementations.
  */
-static int fixup_bpf_calls(struct bpf_verifier_env *env)
+static int do_misc_fixups(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog;
 	bool expect_blinding = bpf_jit_blinding_enabled(prog);
@@ -11593,6 +11846,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 	int i, ret, cnt, delta = 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		/* Make divide-by-zero exceptions impossible. */
 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
@@ -11633,6 +11887,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			continue;
 		}
 
+		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
 		if (BPF_CLASS(insn->code) == BPF_LD &&
 		    (BPF_MODE(insn->code) == BPF_ABS ||
 		     BPF_MODE(insn->code) == BPF_IND)) {
@@ -11652,6 +11907,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			continue;
 		}
 
+		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
@@ -11800,7 +12056,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 		     insn->imm == BPF_FUNC_map_delete_elem ||
 		     insn->imm == BPF_FUNC_map_push_elem   ||
 		     insn->imm == BPF_FUNC_map_pop_elem    ||
-		     insn->imm == BPF_FUNC_map_peek_elem)) {
+		     insn->imm == BPF_FUNC_map_peek_elem   ||
+		     insn->imm == BPF_FUNC_redirect_map)) {
 			aux = &env->insn_aux_data[i + delta];
 			if (bpf_map_ptr_poisoned(aux))
 				goto patch_call_imm;
@@ -11842,6 +12099,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 				     (int (*)(struct bpf_map *map, void *value))NULL));
 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
 				     (int (*)(struct bpf_map *map, void *value))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_redirect,
+				     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
+
 patch_map_ops_generic:
 			switch (insn->imm) {
 			case BPF_FUNC_map_lookup_elem:
@@ -11868,11 +12128,16 @@ patch_map_ops_generic:
 				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
 					    __bpf_call_base;
 				continue;
+			case BPF_FUNC_redirect_map:
+				insn->imm = BPF_CAST_CALL(ops->map_redirect) -
+					    __bpf_call_base;
+				continue;
 			}
 
 			goto patch_call_imm;
 		}
 
+		/* Implement bpf_jiffies64 inline. */
 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
 		    insn->imm == BPF_FUNC_jiffies64) {
 			struct bpf_insn ld_jiffies_addr[2] = {
@@ -12683,7 +12948,7 @@ skip_full_check:
 		ret = convert_ctx_accesses(env);
 
 	if (ret == 0)
-		ret = fixup_bpf_calls(env);
+		ret = do_misc_fixups(env);
 
 	/* do 32-bit optimization after insn patching has done so those patched
 	 * insns could be handled correctly.
diff --git a/kernel/fork.c b/kernel/fork.c
index 54cc905e5fe0..50209691f21a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -96,6 +96,7 @@
 #include <linux/kasan.h>
 #include <linux/scs.h>
 #include <linux/io_uring.h>
+#include <linux/bpf.h>
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
 	cgroup_free(tsk);
 	task_numa_free(tsk, true);
 	security_task_free(tsk);
+	bpf_task_storage_free(tsk);
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
 	put_signal_struct(tsk->signal);
@@ -2072,6 +2074,9 @@ static __latent_entropy struct task_struct *copy_process(
 	p->sequential_io	= 0;
 	p->sequential_io_avg	= 0;
 #endif
+#ifdef CONFIG_BPF_SYSCALL
+	RCU_INIT_POINTER(p->bpf_storage, NULL);
+#endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b0c45d923f0f..0d23755c2747 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1367,6 +1367,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_per_cpu_ptr_proto;
 	case BPF_FUNC_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
+	case BPF_FUNC_task_storage_get:
+		return &bpf_task_storage_get_proto;
+	case BPF_FUNC_task_storage_delete:
+		return &bpf_task_storage_delete_proto;
+	case BPF_FUNC_for_each_map_elem:
+		return &bpf_for_each_map_elem_proto;
 	default:
 		return NULL;
 	}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 76c607ee6db5..5a1dd4736b56 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -487,6 +487,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 	struct rhashtable *ht;
 	const struct bucket_table *tbl;
 	char buff[512] = "";
+	int offset = 0;
 	unsigned int i, cnt = 0;
 
 	ht = &rhlt->ht;
@@ -501,18 +502,18 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 		next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
 
 		if (!rht_is_a_nulls(pos)) {
-			sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
+			offset += sprintf(buff + offset, "\nbucket[%d] -> ", i);
 		}
 
 		while (!rht_is_a_nulls(pos)) {
 			struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead);
-			sprintf(buff, "%s[[", buff);
+			offset += sprintf(buff + offset, "[[");
 			do {
 				pos = &list->rhead;
 				list = rht_dereference(list->next, ht);
 				p = rht_obj(ht, pos);
 
-				sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid,
+				offset += sprintf(buff + offset, " val %d (tid=%d)%s", p->value.id, p->value.tid,
 					list? ", " : " ");
 				cnt++;
 			} while (list);
@@ -521,7 +522,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 			next = !rht_is_a_nulls(pos) ?
 				rht_dereference(pos->next, ht) : NULL;
 
-			sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : "");
+			offset += sprintf(buff + offset, "]]%s", !rht_is_a_nulls(pos) ? " -> " : "");
 		}
 	}
 	printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index 8a3507524f7b..33f17bd8cda7 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Alexander Aring	<aar@pengutronix.de>
  *
- *	Orignal written by:
+ *	Original written by:
  *	Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
  *	Jon Smirl <jonsmirl@gmail.com>
  */
@@ -82,7 +82,7 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed)
 	if (fail)
 		return -EINVAL;
 
-	/* UDP length needs to be infered from the lower layers
+	/* UDP length needs to be inferred from the lower layers
 	 * here, we obtain the hint from the remaining size of the
 	 * frame
 	 */
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 78ec2e1b14d1..59bc13b5f14f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,6 +4,7 @@
 #include <linux/if_vlan.h>
 #include <linux/netpoll.h>
 #include <linux/export.h>
+#include <net/gro.h>
 #include "vlan.h"
 
 bool vlan_do_receive(struct sk_buff **skbp)
@@ -495,7 +496,10 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
 
 	skb_gro_pull(skb, sizeof(*vhdr));
 	skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
-	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+
+	pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive,
+					    ipv6_gro_receive, inet_gro_receive,
+					    head, skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -515,7 +519,9 @@ static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
 	rcu_read_lock();
 	ptype = gro_find_complete_by_type(type);
 	if (ptype)
-		err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
+		err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+					 ipv6_gro_complete, inet_gro_complete,
+					 skb, nhoff + sizeof(*vhdr));
 
 	rcu_read_unlock();
 	return err;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index dc1a197792e6..4db3f0621959 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -776,6 +776,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
 	return real_dev->ifindex;
 }
 
+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
+				      struct net_device_path *path)
+{
+	struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
+
+	path->type = DEV_PATH_VLAN;
+	path->encap.id = vlan->vlan_id;
+	path->encap.proto = vlan->vlan_proto;
+	path->dev = ctx->dev;
+	ctx->dev = vlan->real_dev;
+	if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
+		return -ENOSPC;
+
+	ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
+	ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
+	ctx->num_vlans++;
+
+	return 0;
+}
+
 static const struct ethtool_ops vlan_ethtool_ops = {
 	.get_link_ksettings	= vlan_ethtool_get_link_ksettings,
 	.get_drvinfo	        = vlan_ethtool_get_drvinfo,
@@ -814,6 +834,7 @@ static const struct net_device_ops vlan_netdev_ops = {
 #endif
 	.ndo_fix_features	= vlan_dev_fix_features,
 	.ndo_get_iflink		= vlan_dev_get_iflink,
+	.ndo_fill_forward_path	= vlan_dev_fill_forward_path,
 };
 
 static void vlan_dev_free(struct net_device *dev)
diff --git a/net/Kconfig b/net/Kconfig
index 8cea808ad9e8..9c456acc379e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -245,6 +245,14 @@ source "net/l3mdev/Kconfig"
 source "net/qrtr/Kconfig"
 source "net/ncsi/Kconfig"
 
+config PCPU_DEV_REFCNT
+	bool "Use percpu variables to maintain network device refcount"
+	depends on SMP
+	default y
+	help
+	  network device refcount are using per cpu variables if this option is set.
+	  This can be forced to N to detect underflows (with a performance drop).
+
 config RPS
 	bool
 	depends on SMP && SYSFS
@@ -317,13 +325,9 @@ config BPF_STREAM_PARSER
 	select STREAM_PARSER
 	select NET_SOCK_MSG
 	help
-	  Enabling this allows a stream parser to be used with
+	  Enabling this allows a TCP stream parser to be used with
 	  BPF_MAP_TYPE_SOCKMAP.
 
-	  BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
-	  It can be used to enforce socket policy, implement socket redirects,
-	  etc.
-
 config NET_FLOW_LIMIT
 	bool
 	depends on RPS
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 269ee89d2c2b..2631efc6e359 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -850,6 +850,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
 		case AX25_P_ROSE:
 			if (ax25_protocol_is_registered(AX25_P_ROSE))
 				return -ESOCKTNOSUPPORT;
+			break;
 #endif
 		default:
 			break;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 360bdbf44748..bcd543ce835b 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -438,10 +438,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
 	batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
 			   skb->len + ETH_HLEN);
 
-	if (in_interrupt())
-		netif_rx(skb);
-	else
-		netif_rx_ni(skb);
+	netif_rx_any_context(skb);
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 58bcb8c849d5..0abdd67f44b1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,20 +10,86 @@
 #include <net/bpf_sk_storage.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/net_namespace.h>
 #include <linux/error-injection.h>
 #include <linux/smp.h>
+#include <linux/sock_diag.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/bpf_test_run.h>
 
+struct bpf_test_timer {
+	enum { NO_PREEMPT, NO_MIGRATE } mode;
+	u32 i;
+	u64 time_start, time_spent;
+};
+
+static void bpf_test_timer_enter(struct bpf_test_timer *t)
+	__acquires(rcu)
+{
+	rcu_read_lock();
+	if (t->mode == NO_PREEMPT)
+		preempt_disable();
+	else
+		migrate_disable();
+
+	t->time_start = ktime_get_ns();
+}
+
+static void bpf_test_timer_leave(struct bpf_test_timer *t)
+	__releases(rcu)
+{
+	t->time_start = 0;
+
+	if (t->mode == NO_PREEMPT)
+		preempt_enable();
+	else
+		migrate_enable();
+	rcu_read_unlock();
+}
+
+static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
+	__must_hold(rcu)
+{
+	t->i++;
+	if (t->i >= repeat) {
+		/* We're done. */
+		t->time_spent += ktime_get_ns() - t->time_start;
+		do_div(t->time_spent, t->i);
+		*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
+		*err = 0;
+		goto reset;
+	}
+
+	if (signal_pending(current)) {
+		/* During iteration: we've been cancelled, abort. */
+		*err = -EINTR;
+		goto reset;
+	}
+
+	if (need_resched()) {
+		/* During iteration: we need to reschedule between runs. */
+		t->time_spent += ktime_get_ns() - t->time_start;
+		bpf_test_timer_leave(t);
+		cond_resched();
+		bpf_test_timer_enter(t);
+	}
+
+	/* Do another round. */
+	return true;
+
+reset:
+	t->i = 0;
+	return false;
+}
+
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 			u32 *retval, u32 *time, bool xdp)
 {
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+	struct bpf_test_timer t = { NO_MIGRATE };
 	enum bpf_cgroup_storage_type stype;
-	u64 time_start, time_spent = 0;
-	int ret = 0;
-	u32 i;
+	int ret;
 
 	for_each_cgroup_storage_type(stype) {
 		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@@ -38,40 +104,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 	if (!repeat)
 		repeat = 1;
 
-	rcu_read_lock();
-	migrate_disable();
-	time_start = ktime_get_ns();
-	for (i = 0; i < repeat; i++) {
+	bpf_test_timer_enter(&t);
+	do {
 		bpf_cgroup_storage_set(storage);
 
 		if (xdp)
 			*retval = bpf_prog_run_xdp(prog, ctx);
 		else
 			*retval = BPF_PROG_RUN(prog, ctx);
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-
-		if (need_resched()) {
-			time_spent += ktime_get_ns() - time_start;
-			migrate_enable();
-			rcu_read_unlock();
-
-			cond_resched();
-
-			rcu_read_lock();
-			migrate_disable();
-			time_start = ktime_get_ns();
-		}
-	}
-	time_spent += ktime_get_ns() - time_start;
-	migrate_enable();
-	rcu_read_unlock();
-
-	do_div(time_spent, repeat);
-	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+	} while (bpf_test_timer_continue(&t, repeat, &ret, time));
+	bpf_test_timer_leave(&t);
 
 	for_each_cgroup_storage_type(stype)
 		bpf_cgroup_storage_free(storage[stype]);
@@ -674,18 +716,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 				     const union bpf_attr *kattr,
 				     union bpf_attr __user *uattr)
 {
+	struct bpf_test_timer t = { NO_PREEMPT };
 	u32 size = kattr->test.data_size_in;
 	struct bpf_flow_dissector ctx = {};
 	u32 repeat = kattr->test.repeat;
 	struct bpf_flow_keys *user_ctx;
 	struct bpf_flow_keys flow_keys;
-	u64 time_start, time_spent = 0;
 	const struct ethhdr *eth;
 	unsigned int flags = 0;
 	u32 retval, duration;
 	void *data;
 	int ret;
-	u32 i;
 
 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 		return -EINVAL;
@@ -721,48 +762,127 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	ctx.data = data;
 	ctx.data_end = (__u8 *)data + size;
 
-	rcu_read_lock();
-	preempt_disable();
-	time_start = ktime_get_ns();
-	for (i = 0; i < repeat; i++) {
+	bpf_test_timer_enter(&t);
+	do {
 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
 					  size, flags);
+	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+	bpf_test_timer_leave(&t);
 
-		if (signal_pending(current)) {
-			preempt_enable();
-			rcu_read_unlock();
+	if (ret < 0)
+		goto out;
 
-			ret = -EINTR;
-			goto out;
-		}
+	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
+			      retval, duration);
+	if (!ret)
+		ret = bpf_ctx_finish(kattr, uattr, user_ctx,
+				     sizeof(struct bpf_flow_keys));
 
-		if (need_resched()) {
-			time_spent += ktime_get_ns() - time_start;
-			preempt_enable();
-			rcu_read_unlock();
+out:
+	kfree(user_ctx);
+	kfree(data);
+	return ret;
+}
 
-			cond_resched();
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
+				union bpf_attr __user *uattr)
+{
+	struct bpf_test_timer t = { NO_PREEMPT };
+	struct bpf_prog_array *progs = NULL;
+	struct bpf_sk_lookup_kern ctx = {};
+	u32 repeat = kattr->test.repeat;
+	struct bpf_sk_lookup *user_ctx;
+	u32 retval, duration;
+	int ret = -EINVAL;
 
-			rcu_read_lock();
-			preempt_disable();
-			time_start = ktime_get_ns();
-		}
+	if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
+		return -EINVAL;
+
+	if (kattr->test.flags || kattr->test.cpu)
+		return -EINVAL;
+
+	if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
+	    kattr->test.data_size_out)
+		return -EINVAL;
+
+	if (!repeat)
+		repeat = 1;
+
+	user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
+	if (IS_ERR(user_ctx))
+		return PTR_ERR(user_ctx);
+
+	if (!user_ctx)
+		return -EINVAL;
+
+	if (user_ctx->sk)
+		goto out;
+
+	if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
+		goto out;
+
+	if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+		ret = -ERANGE;
+		goto out;
 	}
-	time_spent += ktime_get_ns() - time_start;
-	preempt_enable();
-	rcu_read_unlock();
 
-	do_div(time_spent, repeat);
-	duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+	ctx.family = (u16)user_ctx->family;
+	ctx.protocol = (u16)user_ctx->protocol;
+	ctx.dport = (u16)user_ctx->local_port;
+	ctx.sport = (__force __be16)user_ctx->remote_port;
 
-	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
-			      retval, duration);
+	switch (ctx.family) {
+	case AF_INET:
+		ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
+		ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
+		break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
+		ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
+		break;
+#endif
+
+	default:
+		ret = -EAFNOSUPPORT;
+		goto out;
+	}
+
+	progs = bpf_prog_array_alloc(1, GFP_KERNEL);
+	if (!progs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	progs->items[0].prog = prog;
+
+	bpf_test_timer_enter(&t);
+	do {
+		ctx.selected_sk = NULL;
+		retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+	bpf_test_timer_leave(&t);
+
+	if (ret < 0)
+		goto out;
+
+	user_ctx->cookie = 0;
+	if (ctx.selected_sk) {
+		if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+
+		user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
+	}
+
+	ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
 	if (!ret)
-		ret = bpf_ctx_finish(kattr, uattr, user_ctx,
-				     sizeof(struct bpf_flow_keys));
+		ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
 
 out:
+	bpf_prog_array_free(progs);
 	kfree(user_ctx);
-	kfree(data);
 	return ret;
 }
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index dfec65eca8a6..3db1def4437b 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -160,7 +160,9 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
 	if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
 		if (p && (p->flags & BR_NEIGH_SUPPRESS))
 			return;
-		if (ipv4_is_zeronet(sip) || sip == tip) {
+		if (parp->ar_op != htons(ARPOP_RREQUEST) &&
+		    parp->ar_op != htons(ARPOP_RREPLY) &&
+		    (ipv4_is_zeronet(sip) || sip == tip)) {
 			/* prevent flooding to neigh suppress ports */
 			BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 			return;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 3f2f06b4dd27..e8b626cc6bfd 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -385,6 +385,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
 	return br_del_if(br, slave_dev);
 }
 
+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
+				struct net_device_path *path)
+{
+	struct net_bridge_fdb_entry *f;
+	struct net_bridge_port *dst;
+	struct net_bridge *br;
+
+	if (netif_is_bridge_port(ctx->dev))
+		return -1;
+
+	br = netdev_priv(ctx->dev);
+
+	br_vlan_fill_forward_path_pvid(br, ctx, path);
+
+	f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
+	if (!f || !f->dst)
+		return -1;
+
+	dst = READ_ONCE(f->dst);
+	if (!dst)
+		return -1;
+
+	if (br_vlan_fill_forward_path_mode(br, dst, path))
+		return -1;
+
+	path->type = DEV_PATH_BRIDGE;
+	path->dev = dst->br->dev;
+	ctx->dev = dst->dev;
+
+	switch (path->bridge.vlan_mode) {
+	case DEV_PATH_BR_VLAN_TAG:
+		if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
+			return -ENOSPC;
+		ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
+		ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
+		ctx->num_vlans++;
+		break;
+	case DEV_PATH_BR_VLAN_UNTAG_HW:
+	case DEV_PATH_BR_VLAN_UNTAG:
+		ctx->num_vlans--;
+		break;
+	case DEV_PATH_BR_VLAN_KEEP:
+		break;
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo		 = br_getinfo,
 	.get_link		 = ethtool_op_get_link,
@@ -419,6 +467,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_bridge_setlink	 = br_setlink,
 	.ndo_bridge_dellink	 = br_dellink,
 	.ndo_features_check	 = passthru_features_check,
+	.ndo_fill_forward_path	 = br_fill_forward_path,
 };
 
 static struct device_type br_type = {
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b7490237f3fc..698b79747d32 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -726,6 +726,56 @@ static inline size_t fdb_nlmsg_size(void)
 		+ nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
 }
 
+static int br_fdb_replay_one(struct notifier_block *nb,
+			     struct net_bridge_fdb_entry *fdb,
+			     struct net_device *dev)
+{
+	struct switchdev_notifier_fdb_info item;
+	int err;
+
+	item.addr = fdb->key.addr.addr;
+	item.vid = fdb->key.vlan_id;
+	item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+	item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
+	item.info.dev = dev;
+
+	err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
+	return notifier_to_errno(err);
+}
+
+int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb)
+{
+	struct net_bridge_fdb_entry *fdb;
+	struct net_bridge *br;
+	int err = 0;
+
+	if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+		return -EINVAL;
+
+	br = netdev_priv(br_dev);
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
+		struct net_bridge_port *dst = READ_ONCE(fdb->dst);
+		struct net_device *dst_dev;
+
+		dst_dev = dst ? dst->dev : br->dev;
+		if (dst_dev != br_dev && dst_dev != dev)
+			continue;
+
+		err = br_fdb_replay_one(nb, fdb, dst_dev);
+		if (err)
+			break;
+	}
+
+	rcu_read_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(br_fdb_replay);
+
 static void fdb_notify(struct net_bridge *br,
 		       const struct net_bridge_fdb_entry *fdb, int type,
 		       bool swdev_notify)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 222285d9dae2..8875e953ac53 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -144,6 +144,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 		break;
 	case BR_PKT_UNICAST:
 		dst = br_fdb_find_rcu(br, eth_hdr(skb)->h_dest, vid);
+		break;
 	default:
 		break;
 	}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 8846c5bcd075..95fa4af0e8dd 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -506,6 +506,134 @@ err:
 	kfree(priv);
 }
 
+static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
+				      const struct net_bridge_mdb_entry *mp)
+{
+	if (mp->addr.proto == htons(ETH_P_IP))
+		ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (mp->addr.proto == htons(ETH_P_IPV6))
+		ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr);
+#endif
+	else
+		ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr);
+
+	mdb->vid = mp->addr.vid;
+}
+
+static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
+			     struct switchdev_obj_port_mdb *mdb,
+			     struct netlink_ext_ack *extack)
+{
+	struct switchdev_notifier_port_obj_info obj_info = {
+		.info = {
+			.dev = dev,
+			.extack = extack,
+		},
+		.obj = &mdb->obj,
+	};
+	int err;
+
+	err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+	return notifier_to_errno(err);
+}
+
+static int br_mdb_queue_one(struct list_head *mdb_list,
+			    enum switchdev_obj_id id,
+			    const struct net_bridge_mdb_entry *mp,
+			    struct net_device *orig_dev)
+{
+	struct switchdev_obj_port_mdb *mdb;
+
+	mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
+	if (!mdb)
+		return -ENOMEM;
+
+	mdb->obj.id = id;
+	mdb->obj.orig_dev = orig_dev;
+	br_switchdev_mdb_populate(mdb, mp);
+	list_add_tail(&mdb->obj.list, mdb_list);
+
+	return 0;
+}
+
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb, struct netlink_ext_ack *extack)
+{
+	struct net_bridge_mdb_entry *mp;
+	struct switchdev_obj *obj, *tmp;
+	struct net_bridge *br;
+	LIST_HEAD(mdb_list);
+	int err = 0;
+
+	ASSERT_RTNL();
+
+	if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+		return -EINVAL;
+
+	br = netdev_priv(br_dev);
+
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+		return 0;
+
+	/* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
+	 * because the write-side protection is br->multicast_lock. But we
+	 * need to emulate the [ blocking ] calling context of a regular
+	 * switchdev event, so since both br->multicast_lock and RCU read side
+	 * critical sections are atomic, we have no choice but to pick the RCU
+	 * read side lock, queue up all our events, leave the critical section
+	 * and notify switchdev from blocking context.
+	 */
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+		struct net_bridge_port_group __rcu **pp;
+		struct net_bridge_port_group *p;
+
+		if (mp->host_joined) {
+			err = br_mdb_queue_one(&mdb_list,
+					       SWITCHDEV_OBJ_ID_HOST_MDB,
+					       mp, br_dev);
+			if (err) {
+				rcu_read_unlock();
+				goto out_free_mdb;
+			}
+		}
+
+		for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+		     pp = &p->next) {
+			if (p->key.port->dev != dev)
+				continue;
+
+			err = br_mdb_queue_one(&mdb_list,
+					       SWITCHDEV_OBJ_ID_PORT_MDB,
+					       mp, dev);
+			if (err) {
+				rcu_read_unlock();
+				goto out_free_mdb;
+			}
+		}
+	}
+
+	rcu_read_unlock();
+
+	list_for_each_entry(obj, &mdb_list, list) {
+		err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
+					extack);
+		if (err)
+			goto out_free_mdb;
+	}
+
+out_free_mdb:
+	list_for_each_entry_safe(obj, tmp, &mdb_list, list) {
+		list_del(&obj->list);
+		kfree(SWITCHDEV_OBJ_PORT_MDB(obj));
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(br_mdb_replay);
+
 static void br_mdb_switchdev_host_port(struct net_device *dev,
 				       struct net_device *lower_dev,
 				       struct net_bridge_mdb_entry *mp,
@@ -515,18 +643,12 @@ static void br_mdb_switchdev_host_port(struct net_device *dev,
 		.obj = {
 			.id = SWITCHDEV_OBJ_ID_HOST_MDB,
 			.flags = SWITCHDEV_F_DEFER,
+			.orig_dev = dev,
 		},
-		.vid = mp->addr.vid,
 	};
 
-	if (mp->addr.proto == htons(ETH_P_IP))
-		ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
-#if IS_ENABLED(CONFIG_IPV6)
-	else
-		ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
-#endif
+	br_switchdev_mdb_populate(&mdb, mp);
 
-	mdb.obj.orig_dev = dev;
 	switch (type) {
 	case RTM_NEWMDB:
 		switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
@@ -558,21 +680,13 @@ void br_mdb_notify(struct net_device *dev,
 			.id = SWITCHDEV_OBJ_ID_PORT_MDB,
 			.flags = SWITCHDEV_F_DEFER,
 		},
-		.vid = mp->addr.vid,
 	};
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
 	if (pg) {
-		if (mp->addr.proto == htons(ETH_P_IP))
-			ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
-#if IS_ENABLED(CONFIG_IPV6)
-		else if (mp->addr.proto == htons(ETH_P_IPV6))
-			ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
-#endif
-		else
-			ether_addr_copy(mdb.addr, mp->addr.dst.mac_addr);
+		br_switchdev_mdb_populate(&mdb, mp);
 
 		mdb.obj.orig_dev = pg->key.port->dev;
 		switch (type) {
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 12487f6fe9b4..cd2b1e424e54 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -411,6 +411,13 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
 	cancel_delayed_work_sync(&mrp->in_test_work);
 	br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0);
 
+	/* Disable the roles */
+	br_mrp_switchdev_set_ring_role(br, mrp, BR_MRP_RING_ROLE_DISABLED);
+	p = rtnl_dereference(mrp->i_port);
+	if (p)
+		br_mrp_switchdev_set_in_role(br, mrp, mrp->in_id, mrp->ring_id,
+					     BR_MRP_IN_ROLE_DISABLED);
+
 	br_mrp_switchdev_del(br, mrp);
 
 	/* Reset the ports */
diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
index fea38b9a7268..13290a749d09 100644
--- a/net/bridge/br_multicast_eht.c
+++ b/net/bridge/br_multicast_eht.c
@@ -498,11 +498,13 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
 					       &set_h->h_addr);
 }
 
-static void __eht_allow_incl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
+/* create new set entries from reports */
+static void __eht_create_set_entries(struct net_bridge_port_group *pg,
+				     union net_bridge_eht_addr *h_addr,
+				     void *srcs,
+				     u32 nsrcs,
+				     size_t addr_size,
+				     int filter_mode)
 {
 	union net_bridge_eht_addr eht_src_addr;
 	u32 src_idx;
@@ -511,72 +513,17 @@ static void __eht_allow_incl(struct net_bridge_port_group *pg,
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
 		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
 		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-						  MCAST_INCLUDE,
+						  filter_mode,
 						  false);
 	}
 }
 
-static bool __eht_allow_excl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
-{
-	bool changed = false, host_excl = false;
-	union net_bridge_eht_addr eht_src_addr;
-	struct net_bridge_group_src *src_ent;
-	struct br_ip src_ip;
-	u32 src_idx;
-
-	host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
-	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
-	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		if (!host_excl) {
-			br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-							  MCAST_INCLUDE,
-							  false);
-		} else {
-			if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
-							    h_addr))
-				continue;
-			memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
-			src_ent = br_multicast_find_group_src(pg, &src_ip);
-			if (!src_ent)
-				continue;
-			br_multicast_del_group_src(src_ent, true);
-			changed = true;
-		}
-	}
-
-	return changed;
-}
-
-static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
-				   union net_bridge_eht_addr *h_addr,
-				   void *srcs,
-				   u32 nsrcs,
-				   size_t addr_size)
-{
-	bool changed = false;
-
-	switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
-	case MCAST_INCLUDE:
-		__eht_allow_incl(pg, h_addr, srcs, nsrcs, addr_size);
-		break;
-	case MCAST_EXCLUDE:
-		changed = __eht_allow_excl(pg, h_addr, srcs, nsrcs, addr_size);
-		break;
-	}
-
-	return changed;
-}
-
-static bool __eht_block_incl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
+/* delete existing set entries and their (S,G) entries if they were the last */
+static bool __eht_del_set_entries(struct net_bridge_port_group *pg,
+				  union net_bridge_eht_addr *h_addr,
+				  void *srcs,
+				  u32 nsrcs,
+				  size_t addr_size)
 {
 	union net_bridge_eht_addr eht_src_addr;
 	struct net_bridge_group_src *src_ent;
@@ -602,39 +549,23 @@ static bool __eht_block_incl(struct net_bridge_port_group *pg,
 	return changed;
 }
 
-static bool __eht_block_excl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
+static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+				   union net_bridge_eht_addr *h_addr,
+				   void *srcs,
+				   u32 nsrcs,
+				   size_t addr_size)
 {
-	bool changed = false, host_excl = false;
-	union net_bridge_eht_addr eht_src_addr;
-	struct net_bridge_group_src *src_ent;
-	struct br_ip src_ip;
-	u32 src_idx;
+	bool changed = false;
 
-	host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
-	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
-	memset(&src_ip, 0, sizeof(src_ip));
-	src_ip.proto = pg->key.addr.proto;
-	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		if (host_excl) {
-			br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-							  MCAST_EXCLUDE,
-							  false);
-		} else {
-			if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
-							    h_addr))
-				continue;
-			memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
-			src_ent = br_multicast_find_group_src(pg, &src_ip);
-			if (!src_ent)
-				continue;
-			br_multicast_del_group_src(src_ent, true);
-			changed = true;
-		}
+	switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
+	case MCAST_INCLUDE:
+		__eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+					 MCAST_INCLUDE);
+		break;
+	case MCAST_EXCLUDE:
+		changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
+						addr_size);
+		break;
 	}
 
 	return changed;
@@ -650,10 +581,12 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
 
 	switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
 	case MCAST_INCLUDE:
-		changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size);
+		changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
+						addr_size);
 		break;
 	case MCAST_EXCLUDE:
-		changed = __eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size);
+		__eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+					 MCAST_EXCLUDE);
 		break;
 	}
 
@@ -671,7 +604,6 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
 {
 	bool changed = false, flush_entries = to_report;
 	union net_bridge_eht_addr eht_src_addr;
-	u32 src_idx;
 
 	if (br_multicast_eht_host_filter_mode(pg, h_addr) != filter_mode)
 		flush_entries = true;
@@ -680,11 +612,8 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
 	/* if we're changing mode del host and its entries */
 	if (flush_entries)
 		br_multicast_del_eht_host(pg, h_addr);
-	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-						  filter_mode, false);
-	}
+	__eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+				 filter_mode);
 	/* we can be missing sets only if we've deleted some entries */
 	if (flush_entries) {
 		struct net_bridge *br = pg->key.port->br;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d7d167e10b70..50747990188e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1118,6 +1118,13 @@ void br_vlan_notify(const struct net_bridge *br,
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
 			     const struct net_bridge_vlan *range_end);
 
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+				    struct net_device_path_ctx *ctx,
+				    struct net_device_path *path);
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+				   struct net_bridge_port *dst,
+				   struct net_device_path *path);
+
 static inline struct net_bridge_vlan_group *br_vlan_group(
 					const struct net_bridge *br)
 {
@@ -1277,6 +1284,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
 	return 0;
 }
 
+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+						  struct net_device_path_ctx *ctx,
+						  struct net_device_path *path)
+{
+}
+
+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+						 struct net_bridge_port *dst,
+						 struct net_device_path *path)
+{
+	return 0;
+}
+
 static inline struct net_bridge_vlan_group *br_vlan_group(
 					const struct net_bridge *br)
 {
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 21c6781906aa..3dafb6143cff 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -64,6 +64,20 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
 	}
 }
 
+u8 br_port_get_stp_state(const struct net_device *dev)
+{
+	struct net_bridge_port *p;
+
+	ASSERT_RTNL();
+
+	p = br_port_get_rtnl(dev);
+	if (!p)
+		return BR_STATE_DISABLED;
+
+	return p->state;
+}
+EXPORT_SYMBOL_GPL(br_port_get_stp_state);
+
 /* called under bridge lock */
 struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
 {
@@ -625,6 +639,19 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
 	return 0;
 }
 
+clock_t br_get_ageing_time(struct net_device *br_dev)
+{
+	struct net_bridge *br;
+
+	if (!netif_is_bridge_master(br_dev))
+		return 0;
+
+	br = netdev_priv(br_dev);
+
+	return jiffies_to_clock_t(br->ageing_time);
+}
+EXPORT_SYMBOL_GPL(br_get_ageing_time);
+
 /* called under bridge lock */
 void __br_set_topology_change(struct net_bridge *br, unsigned char val)
 {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8829f621b8ec..da3256a3eed0 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1339,6 +1339,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
 }
 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
 
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+				    struct net_device_path_ctx *ctx,
+				    struct net_device_path *path)
+{
+	struct net_bridge_vlan_group *vg;
+	int idx = ctx->num_vlans - 1;
+	u16 vid;
+
+	path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
+
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
+		return;
+
+	vg = br_vlan_group(br);
+
+	if (idx >= 0 &&
+	    ctx->vlan[idx].proto == br->vlan_proto) {
+		vid = ctx->vlan[idx].id;
+	} else {
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
+		vid = br_get_pvid(vg);
+	}
+
+	path->bridge.vlan_id = vid;
+	path->bridge.vlan_proto = br->vlan_proto;
+}
+
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+				   struct net_bridge_port *dst,
+				   struct net_device_path *path)
+{
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *v;
+
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
+		return 0;
+
+	vg = nbp_vlan_group_rcu(dst);
+	v = br_vlan_find(vg, path->bridge.vlan_id);
+	if (!v || !br_vlan_should_use(v))
+		return -EINVAL;
+
+	if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
+		return 0;
+
+	if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
+	else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
+	else
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
+
+	return 0;
+}
+
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
 		     struct bridge_vlan_info *p_vinfo)
 {
@@ -1751,6 +1806,79 @@ out_kfree:
 	kfree_skb(skb);
 }
 
+static int br_vlan_replay_one(struct notifier_block *nb,
+			      struct net_device *dev,
+			      struct switchdev_obj_port_vlan *vlan,
+			      struct netlink_ext_ack *extack)
+{
+	struct switchdev_notifier_port_obj_info obj_info = {
+		.info = {
+			.dev = dev,
+			.extack = extack,
+		},
+		.obj = &vlan->obj,
+	};
+	int err;
+
+	err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+	return notifier_to_errno(err);
+}
+
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+		   struct notifier_block *nb, struct netlink_ext_ack *extack)
+{
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *v;
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+	int err = 0;
+	u16 pvid;
+
+	ASSERT_RTNL();
+
+	if (!netif_is_bridge_master(br_dev))
+		return -EINVAL;
+
+	if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
+		return -EINVAL;
+
+	if (netif_is_bridge_master(dev)) {
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
+		p = NULL;
+	} else {
+		p = br_port_get_rtnl(dev);
+		if (WARN_ON(!p))
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+		br = p->br;
+	}
+
+	if (!vg)
+		return 0;
+
+	pvid = br_get_pvid(vg);
+
+	list_for_each_entry(v, &vg->vlan_list, vlist) {
+		struct switchdev_obj_port_vlan vlan = {
+			.obj.orig_dev = dev,
+			.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+			.flags = br_vlan_flags(v, pvid),
+			.vid = v->vid,
+		};
+
+		if (!br_vlan_should_use(v))
+			continue;
+
+		err = br_vlan_replay_one(nb, dev, &vlan, extack);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(br_vlan_replay);
+
 /* check if v_curr can enter a range ending in range_end */
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
 			     const struct net_bridge_vlan *range_end)
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 169e005fbda2..0d3a8c01552e 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -35,7 +35,7 @@ static const struct rhashtable_params br_vlan_tunnel_rht_params = {
 };
 
 static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
-						     u64 tunnel_id)
+						     __be64 tunnel_id)
 {
 	return rhashtable_lookup_fast(tbl, &tunnel_id,
 				      br_vlan_tunnel_rht_params);
diff --git a/net/core/Makefile b/net/core/Makefile
index 3e2c378e5f31..0c2233c826fd 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,7 +16,6 @@ obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
-obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
@@ -28,10 +27,13 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
 obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
-obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
 obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
 obj-$(CONFIG_FAILOVER) += failover.o
+ifeq ($(CONFIG_INET),y)
+obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
+obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
+endif
 obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 4edd033e899c..cc3712ad8716 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -89,7 +89,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_local_storage_map *)map;
 	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
-	bpf_local_storage_map_free(smap);
+	bpf_local_storage_map_free(smap, NULL);
 }
 
 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
diff --git a/net/core/dev.c b/net/core/dev.c
index 0f72ff5d34ba..48b529d59157 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -848,6 +848,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
 
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
+{
+	int k = stack->num_paths++;
+
+	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
+		return NULL;
+
+	return &stack->path[k];
+}
+
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+			  struct net_device_path_stack *stack)
+{
+	const struct net_device *last_dev;
+	struct net_device_path_ctx ctx = {
+		.dev	= dev,
+		.daddr	= daddr,
+	};
+	struct net_device_path *path;
+	int ret = 0;
+
+	stack->num_paths = 0;
+	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
+		last_dev = ctx.dev;
+		path = dev_fwd_path(stack);
+		if (!path)
+			return -1;
+
+		memset(path, 0, sizeof(struct net_device_path));
+		ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
+		if (ret < 0)
+			return -1;
+
+		if (WARN_ON_ONCE(last_dev == ctx.dev))
+			return -1;
+	}
+	path = dev_fwd_path(stack);
+	if (!path)
+		return -1;
+	path->type = DEV_PATH_ETHERNET;
+	path->dev = ctx.dev;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
@@ -2463,16 +2509,14 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 EXPORT_SYMBOL(netdev_txq_to_tc);
 
 #ifdef CONFIG_XPS
-struct static_key xps_needed __read_mostly;
-EXPORT_SYMBOL(xps_needed);
-struct static_key xps_rxqs_needed __read_mostly;
-EXPORT_SYMBOL(xps_rxqs_needed);
+static struct static_key xps_needed __read_mostly;
+static struct static_key xps_rxqs_needed __read_mostly;
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 
 static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
-			     int tci, u16 index)
+			     struct xps_dev_maps *old_maps, int tci, u16 index)
 {
 	struct xps_map *map = NULL;
 	int pos;
@@ -2491,6 +2535,8 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 			break;
 		}
 
+		if (old_maps)
+			RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
 		RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 		kfree_rcu(map, rcu);
 		return false;
@@ -2503,7 +2549,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 				 struct xps_dev_maps *dev_maps,
 				 int cpu, u16 offset, u16 count)
 {
-	int num_tc = dev->num_tc ? : 1;
+	int num_tc = dev_maps->num_tc;
 	bool active = false;
 	int tci;
 
@@ -2511,7 +2557,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 		int i, j;
 
 		for (i = count, j = offset; i--; j++) {
-			if (!remove_xps_queue(dev_maps, tci, j))
+			if (!remove_xps_queue(dev_maps, NULL, tci, j))
 				break;
 		}
 
@@ -2523,74 +2569,54 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 
 static void reset_xps_maps(struct net_device *dev,
 			   struct xps_dev_maps *dev_maps,
-			   bool is_rxqs_map)
+			   enum xps_map_type type)
 {
-	if (is_rxqs_map) {
-		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
-		RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
-	} else {
-		RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
-	}
 	static_key_slow_dec_cpuslocked(&xps_needed);
+	if (type == XPS_RXQS)
+		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+
+	RCU_INIT_POINTER(dev->xps_maps[type], NULL);
+
 	kfree_rcu(dev_maps, rcu);
 }
 
-static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
-			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
-			   u16 offset, u16 count, bool is_rxqs_map)
+static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
+			   u16 offset, u16 count)
 {
+	struct xps_dev_maps *dev_maps;
 	bool active = false;
 	int i, j;
 
-	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
-	     j < nr_ids;)
-		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
-					       count);
+	dev_maps = xmap_dereference(dev->xps_maps[type]);
+	if (!dev_maps)
+		return;
+
+	for (j = 0; j < dev_maps->nr_ids; j++)
+		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
 	if (!active)
-		reset_xps_maps(dev, dev_maps, is_rxqs_map);
+		reset_xps_maps(dev, dev_maps, type);
 
-	if (!is_rxqs_map) {
-		for (i = offset + (count - 1); count--; i--) {
+	if (type == XPS_CPUS) {
+		for (i = offset + (count - 1); count--; i--)
 			netdev_queue_numa_node_write(
-				netdev_get_tx_queue(dev, i),
-				NUMA_NO_NODE);
-		}
+				netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
 	}
 }
 
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
-	const unsigned long *possible_mask = NULL;
-	struct xps_dev_maps *dev_maps;
-	unsigned int nr_ids;
-
 	if (!static_key_false(&xps_needed))
 		return;
 
 	cpus_read_lock();
 	mutex_lock(&xps_map_mutex);
 
-	if (static_key_false(&xps_rxqs_needed)) {
-		dev_maps = xmap_dereference(dev->xps_rxqs_map);
-		if (dev_maps) {
-			nr_ids = dev->num_rx_queues;
-			clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
-				       offset, count, true);
-		}
-	}
-
-	dev_maps = xmap_dereference(dev->xps_cpus_map);
-	if (!dev_maps)
-		goto out_no_maps;
+	if (static_key_false(&xps_rxqs_needed))
+		clean_xps_maps(dev, XPS_RXQS, offset, count);
 
-	if (num_possible_cpus() > 1)
-		possible_mask = cpumask_bits(cpu_possible_mask);
-	nr_ids = nr_cpu_ids;
-	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
-		       false);
+	clean_xps_maps(dev, XPS_CPUS, offset, count);
 
-out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 	cpus_read_unlock();
 }
@@ -2640,16 +2666,35 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
 	return new_map;
 }
 
+/* Copy xps maps at a given index */
+static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
+			      struct xps_dev_maps *new_dev_maps, int index,
+			      int tc, bool skip_tc)
+{
+	int i, tci = index * dev_maps->num_tc;
+	struct xps_map *map;
+
+	/* copy maps belonging to foreign traffic classes */
+	for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+		if (i == tc && skip_tc)
+			continue;
+
+		/* fill in the new device map from the old device map */
+		map = xmap_dereference(dev_maps->attr_map[tci]);
+		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
+	}
+}
+
 /* Must be called under cpus_read_lock */
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
-			  u16 index, bool is_rxqs_map)
+			  u16 index, enum xps_map_type type)
 {
-	const unsigned long *online_mask = NULL, *possible_mask = NULL;
-	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
+	const unsigned long *online_mask = NULL;
+	bool active = false, copy = false;
 	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
-	bool active = false;
 	unsigned int nr_ids;
 
 	if (dev->num_tc) {
@@ -2667,38 +2712,48 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	}
 
 	mutex_lock(&xps_map_mutex);
-	if (is_rxqs_map) {
+
+	dev_maps = xmap_dereference(dev->xps_maps[type]);
+	if (type == XPS_RXQS) {
 		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
-		dev_maps = xmap_dereference(dev->xps_rxqs_map);
 		nr_ids = dev->num_rx_queues;
 	} else {
 		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
-		if (num_possible_cpus() > 1) {
+		if (num_possible_cpus() > 1)
 			online_mask = cpumask_bits(cpu_online_mask);
-			possible_mask = cpumask_bits(cpu_possible_mask);
-		}
-		dev_maps = xmap_dereference(dev->xps_cpus_map);
 		nr_ids = nr_cpu_ids;
 	}
 
 	if (maps_sz < L1_CACHE_BYTES)
 		maps_sz = L1_CACHE_BYTES;
 
+	/* The old dev_maps could be larger or smaller than the one we're
+	 * setting up now, as dev->num_tc or nr_ids could have been updated in
+	 * between. We could try to be smart, but let's be safe instead and only
+	 * copy foreign traffic classes if the two map sizes match.
+	 */
+	if (dev_maps &&
+	    dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
+		copy = true;
+
 	/* allocate memory for queue storage */
 	for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
 	     j < nr_ids;) {
-		if (!new_dev_maps)
-			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
-			mutex_unlock(&xps_map_mutex);
-			return -ENOMEM;
+			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+			if (!new_dev_maps) {
+				mutex_unlock(&xps_map_mutex);
+				return -ENOMEM;
+			}
+
+			new_dev_maps->nr_ids = nr_ids;
+			new_dev_maps->num_tc = num_tc;
 		}
 
 		tci = j * num_tc + tc;
-		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
-				 NULL;
+		map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
 
-		map = expand_xps_map(map, j, index, is_rxqs_map);
+		map = expand_xps_map(map, j, index, type == XPS_RXQS);
 		if (!map)
 			goto error;
 
@@ -2711,29 +2766,21 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (!dev_maps) {
 		/* Increment static keys at most once per type */
 		static_key_slow_inc_cpuslocked(&xps_needed);
-		if (is_rxqs_map)
+		if (type == XPS_RXQS)
 			static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
 	}
 
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
-		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
-			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->attr_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
-		}
+	for (j = 0; j < nr_ids; j++) {
+		bool skip_tc = false;
 
-		/* We need to explicitly update tci as prevous loop
-		 * could break out early if dev_maps is NULL.
-		 */
 		tci = j * num_tc + tc;
-
 		if (netif_attr_test_mask(j, mask, nr_ids) &&
 		    netif_attr_test_online(j, online_mask, nr_ids)) {
 			/* add tx-queue to CPU/rx-queue maps */
 			int pos = 0;
 
+			skip_tc = true;
+
 			map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
@@ -2741,78 +2788,81 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-			if (!is_rxqs_map) {
+			if (type == XPS_CPUS) {
 				if (numa_node_id == -2)
 					numa_node_id = cpu_to_node(j);
 				else if (numa_node_id != cpu_to_node(j))
 					numa_node_id = -1;
 			}
 #endif
-		} else if (dev_maps) {
-			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->attr_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
-		/* copy maps belonging to foreign traffic classes */
-		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
-			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->attr_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
-		}
+		if (copy)
+			xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
+					  skip_tc);
 	}
 
-	if (is_rxqs_map)
-		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
-	else
-		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
+	rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
 
 	/* Cleanup old maps */
 	if (!dev_maps)
 		goto out_no_old_maps;
 
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
-		for (i = num_tc, tci = j * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+	for (j = 0; j < dev_maps->nr_ids; j++) {
+		for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
 			map = xmap_dereference(dev_maps->attr_map[tci]);
-			if (map && map != new_map)
-				kfree_rcu(map, rcu);
+			if (!map)
+				continue;
+
+			if (copy) {
+				new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+				if (map == new_map)
+					continue;
+			}
+
+			RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
+			kfree_rcu(map, rcu);
 		}
 	}
 
-	kfree_rcu(dev_maps, rcu);
+	old_dev_maps = dev_maps;
 
 out_no_old_maps:
 	dev_maps = new_dev_maps;
 	active = true;
 
 out_no_new_maps:
-	if (!is_rxqs_map) {
+	if (type == XPS_CPUS)
 		/* update Tx queue numa node */
 		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
 					     (numa_node_id >= 0) ?
 					     numa_node_id : NUMA_NO_NODE);
-	}
 
 	if (!dev_maps)
 		goto out_no_maps;
 
 	/* removes tx-queue from unused CPUs/rx-queues */
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
-		for (i = tc, tci = j * num_tc; i--; tci++)
-			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!netif_attr_test_mask(j, mask, nr_ids) ||
-		    !netif_attr_test_online(j, online_mask, nr_ids))
-			active |= remove_xps_queue(dev_maps, tci, index);
-		for (i = num_tc - tc, tci++; --i; tci++)
-			active |= remove_xps_queue(dev_maps, tci, index);
+	for (j = 0; j < dev_maps->nr_ids; j++) {
+		tci = j * dev_maps->num_tc;
+
+		for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+			if (i == tc &&
+			    netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
+			    netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
+				continue;
+
+			active |= remove_xps_queue(dev_maps,
+						   copy ? old_dev_maps : NULL,
+						   tci, index);
+		}
 	}
 
+	if (old_dev_maps)
+		kfree_rcu(old_dev_maps, rcu);
+
 	/* free map if not active */
 	if (!active)
-		reset_xps_maps(dev, dev_maps, is_rxqs_map);
+		reset_xps_maps(dev, dev_maps, type);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
@@ -2820,11 +2870,10 @@ out_no_maps:
 	return 0;
 error:
 	/* remove any maps that we added */
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
+	for (j = 0; j < nr_ids; j++) {
 		for (i = num_tc, tci = j * num_tc; i--; tci++) {
 			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
-			map = dev_maps ?
+			map = copy ?
 			      xmap_dereference(dev_maps->attr_map[tci]) :
 			      NULL;
 			if (new_map && new_map != map)
@@ -2845,7 +2894,7 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	int ret;
 
 	cpus_read_lock();
-	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
 	cpus_read_unlock();
 
 	return ret;
@@ -3956,13 +4005,15 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 			       struct xps_dev_maps *dev_maps, unsigned int tci)
 {
+	int tc = netdev_get_prio_tc_map(dev, skb->priority);
 	struct xps_map *map;
 	int queue_index = -1;
 
-	if (dev->num_tc) {
-		tci *= dev->num_tc;
-		tci += netdev_get_prio_tc_map(dev, skb->priority);
-	}
+	if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
+		return queue_index;
+
+	tci *= dev_maps->num_tc;
+	tci += tc;
 
 	map = rcu_dereference(dev_maps->attr_map[tci]);
 	if (map) {
@@ -3993,18 +4044,18 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
 	if (!static_key_false(&xps_rxqs_needed))
 		goto get_cpus_map;
 
-	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
+	dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
 	if (dev_maps) {
 		int tci = sk_rx_queue_get(sk);
 
-		if (tci >= 0 && tci < dev->num_rx_queues)
+		if (tci >= 0)
 			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
 							  tci);
 	}
 
 get_cpus_map:
 	if (queue_index < 0) {
-		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
+		dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
 		if (dev_maps) {
 			unsigned int tci = skb->sender_cpu - 1;
 
@@ -5284,6 +5335,7 @@ skip_classify:
 			goto another_round;
 		case RX_HANDLER_EXACT:
 			deliver_exact = true;
+			break;
 		case RX_HANDLER_PASS:
 			break;
 		default:
@@ -5876,15 +5928,13 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-static struct list_head *gro_list_prepare(struct napi_struct *napi,
-					  struct sk_buff *skb)
+static void gro_list_prepare(const struct list_head *head,
+			     const struct sk_buff *skb)
 {
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
-	struct list_head *head;
 	struct sk_buff *p;
 
-	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
 	list_for_each_entry(p, head, list) {
 		unsigned long diffs;
 
@@ -5910,8 +5960,6 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 	}
-
-	return head;
 }
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
@@ -5974,11 +6022,11 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
 
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+	u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+	struct gro_list *gro_list = &napi->gro_hash[bucket];
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *gro_head;
 	struct sk_buff *pp = NULL;
 	enum gro_result ret;
 	int same_flow;
@@ -5987,7 +6035,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	gro_head = gro_list_prepare(napi, skb);
+	gro_list_prepare(&gro_list->list, skb);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
@@ -6023,7 +6071,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
 		pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
 					ipv6_gro_receive, inet_gro_receive,
-					gro_head, skb);
+					&gro_list->list, skb);
 		break;
 	}
 	rcu_read_unlock();
@@ -6042,7 +6090,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (pp) {
 		skb_list_del_init(pp);
 		napi_gro_complete(napi, pp);
-		napi->gro_hash[hash].count--;
+		gro_list->count--;
 	}
 
 	if (same_flow)
@@ -6051,16 +6099,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (NAPI_GRO_CB(skb)->flush)
 		goto normal;
 
-	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
-		gro_flush_oldest(napi, gro_head);
-	} else {
-		napi->gro_hash[hash].count++;
-	}
+	if (unlikely(gro_list->count >= MAX_GRO_SKBS))
+		gro_flush_oldest(napi, &gro_list->list);
+	else
+		gro_list->count++;
+
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-	list_add(&skb->list, gro_head);
+	list_add(&skb->list, &gro_list->list);
 	ret = GRO_HELD;
 
 pull:
@@ -6068,11 +6116,11 @@ pull:
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
-	if (napi->gro_hash[hash].count) {
-		if (!test_bit(hash, &napi->gro_bitmask))
-			__set_bit(hash, &napi->gro_bitmask);
-	} else if (test_bit(hash, &napi->gro_bitmask)) {
-		__clear_bit(hash, &napi->gro_bitmask);
+	if (gro_list->count) {
+		if (!test_bit(bucket, &napi->gro_bitmask))
+			__set_bit(bucket, &napi->gro_bitmask);
+	} else if (test_bit(bucket, &napi->gro_bitmask)) {
+		__clear_bit(bucket, &napi->gro_bitmask);
 	}
 
 	return ret;
@@ -6789,6 +6837,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
 
 	return err;
 }
+EXPORT_SYMBOL(dev_set_threaded);
 
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
@@ -10336,14 +10385,20 @@ EXPORT_SYMBOL(register_netdev);
 
 int netdev_refcnt_read(const struct net_device *dev)
 {
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	int i, refcnt = 0;
 
 	for_each_possible_cpu(i)
 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
 	return refcnt;
+#else
+	return refcount_read(&dev->dev_refcnt);
+#endif
 }
 EXPORT_SYMBOL(netdev_refcnt_read);
 
+int netdev_unregister_timeout_secs __read_mostly = 10;
+
 #define WAIT_REFS_MIN_MSECS 1
 #define WAIT_REFS_MAX_MSECS 250
 /**
@@ -10368,7 +10423,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 	rebroadcast_time = warning_time = jiffies;
 	refcnt = netdev_refcnt_read(dev);
 
-	while (refcnt != 0) {
+	while (refcnt != 1) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
 			rtnl_lock();
 
@@ -10405,7 +10460,9 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 		refcnt = netdev_refcnt_read(dev);
 
-		if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
+		if (refcnt &&
+		    time_after(jiffies, warning_time +
+			       netdev_unregister_timeout_secs * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
 			warning_time = jiffies;
@@ -10481,7 +10538,7 @@ void netdev_run_todo(void)
 		netdev_wait_allrefs(dev);
 
 		/* paranoia */
-		BUG_ON(netdev_refcnt_read(dev));
+		BUG_ON(netdev_refcnt_read(dev) != 1);
 		BUG_ON(!list_empty(&dev->ptype_all));
 		BUG_ON(!list_empty(&dev->ptype_specific));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
@@ -10698,9 +10755,14 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
 		goto free_dev;
+	dev_hold(dev);
+#else
+	refcount_set(&dev->dev_refcnt, 1);
+#endif
 
 	if (dev_addr_init(dev))
 		goto free_pcpu;
@@ -10764,8 +10826,10 @@ free_all:
 	return NULL;
 
 free_pcpu:
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	free_percpu(dev->pcpu_refcnt);
 free_dev:
+#endif
 	netdev_freemem(dev);
 	return NULL;
 }
@@ -10807,8 +10871,10 @@ void free_netdev(struct net_device *dev)
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
+#endif
 	free_percpu(dev->xdp_bulkq);
 	dev->xdp_bulkq = NULL;
 
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index db65ce62b625..ead2a8aa57b4 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1754,7 +1754,7 @@ static void exit_net_drop_monitor(void)
 
 	/*
 	 * Because of the module_get/put we do in the trace state change path
-	 * we are guarnateed not to have any current users when we get here
+	 * we are guaranteed not to have any current users when we get here
 	 */
 
 	for_each_possible_cpu(cpu) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 9323d34d34cc..f5eeebf6a16f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1863,10 +1863,7 @@ static const struct bpf_func_proto bpf_sk_fullsock_proto = {
 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
 					   unsigned int write_len)
 {
-	int err = __bpf_try_make_writable(skb, write_len);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return err;
+	return __bpf_try_make_writable(skb, write_len);
 }
 
 BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
@@ -3412,6 +3409,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 					 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+					 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 					 BPF_F_ADJ_ROOM_ENCAP_L2( \
 					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
@@ -3448,6 +3446,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 		    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
 			return -EINVAL;
 
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+		    inner_mac_len < ETH_HLEN)
+			return -EINVAL;
+
 		if (skb->encapsulation)
 			return -EALREADY;
 
@@ -3466,7 +3468,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 		skb->inner_mac_header = inner_net - inner_mac_len;
 		skb->inner_network_header = inner_net;
 		skb->inner_transport_header = inner_trans;
-		skb_set_inner_protocol(skb, skb->protocol);
+
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+			skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+		else
+			skb_set_inner_protocol(skb, skb->protocol);
 
 		skb->encapsulation = 1;
 		skb_set_network_header(skb, mac_len);
@@ -3577,7 +3583,6 @@ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 			return -ENOMEM;
 		__skb_pull(skb, len_diff_abs);
 	}
-	bpf_compute_data_end_sk_skb(skb);
 	if (tls_sw_has_ctx_rx(skb->sk)) {
 		struct strp_msg *rxm = strp_msg(skb);
 
@@ -3742,10 +3747,7 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
 BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
 	   u64, flags)
 {
-	int ret = __bpf_skb_change_tail(skb, new_len, flags);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return ret;
+	return __bpf_skb_change_tail(skb, new_len, flags);
 }
 
 static const struct bpf_func_proto sk_skb_change_tail_proto = {
@@ -3808,10 +3810,7 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
 BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
 	   u64, flags)
 {
-	int ret = __bpf_skb_change_head(skb, head_room, flags);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return ret;
+	return __bpf_skb_change_head(skb, head_room, flags);
 }
 
 static const struct bpf_func_proto sk_skb_change_head_proto = {
@@ -3919,23 +3918,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
-static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
-			    struct bpf_map *map, struct xdp_buff *xdp)
-{
-	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP:
-	case BPF_MAP_TYPE_DEVMAP_HASH:
-		return dev_map_enqueue(fwd, xdp, dev_rx);
-	case BPF_MAP_TYPE_CPUMAP:
-		return cpu_map_enqueue(fwd, xdp, dev_rx);
-	case BPF_MAP_TYPE_XSKMAP:
-		return __xsk_map_redirect(fwd, xdp);
-	default:
-		return -EBADRQC;
-	}
-	return 0;
-}
-
 void xdp_do_flush(void)
 {
 	__dev_flush();
@@ -3944,71 +3926,52 @@ void xdp_do_flush(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 
-static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
-{
-	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP:
-		return __dev_map_lookup_elem(map, index);
-	case BPF_MAP_TYPE_DEVMAP_HASH:
-		return __dev_map_hash_lookup_elem(map, index);
-	case BPF_MAP_TYPE_CPUMAP:
-		return __cpu_map_lookup_elem(map, index);
-	case BPF_MAP_TYPE_XSKMAP:
-		return __xsk_map_lookup_elem(map, index);
-	default:
-		return NULL;
-	}
-}
-
-void bpf_clear_redirect_map(struct bpf_map *map)
-{
-	struct bpf_redirect_info *ri;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
-		/* Avoid polluting remote cacheline due to writes if
-		 * not needed. Once we pass this test, we need the
-		 * cmpxchg() to make sure it hasn't been changed in
-		 * the meantime by remote CPU.
-		 */
-		if (unlikely(READ_ONCE(ri->map) == map))
-			cmpxchg(&ri->map, map, NULL);
-	}
-}
-
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	struct bpf_map *map = READ_ONCE(ri->map);
-	u32 index = ri->tgt_index;
+	enum bpf_map_type map_type = ri->map_type;
 	void *fwd = ri->tgt_value;
+	u32 map_id = ri->map_id;
 	int err;
 
-	ri->tgt_index = 0;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
+	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+	ri->map_type = BPF_MAP_TYPE_UNSPEC;
 
-	if (unlikely(!map)) {
-		fwd = dev_get_by_index_rcu(dev_net(dev), index);
-		if (unlikely(!fwd)) {
-			err = -EINVAL;
-			goto err;
+	switch (map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		fallthrough;
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		err = dev_map_enqueue(fwd, xdp, dev);
+		break;
+	case BPF_MAP_TYPE_CPUMAP:
+		err = cpu_map_enqueue(fwd, xdp, dev);
+		break;
+	case BPF_MAP_TYPE_XSKMAP:
+		err = __xsk_map_redirect(fwd, xdp);
+		break;
+	case BPF_MAP_TYPE_UNSPEC:
+		if (map_id == INT_MAX) {
+			fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
+			if (unlikely(!fwd)) {
+				err = -EINVAL;
+				break;
+			}
+			err = dev_xdp_enqueue(fwd, xdp, dev);
+			break;
 		}
-
-		err = dev_xdp_enqueue(fwd, xdp, dev);
-	} else {
-		err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+		fallthrough;
+	default:
+		err = -EBADRQC;
 	}
 
 	if (unlikely(err))
 		goto err;
 
-	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
 	return 0;
 err:
-	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@ -4017,41 +3980,36 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
 				       struct bpf_prog *xdp_prog,
-				       struct bpf_map *map)
+				       void *fwd,
+				       enum bpf_map_type map_type, u32 map_id)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	u32 index = ri->tgt_index;
-	void *fwd = ri->tgt_value;
-	int err = 0;
-
-	ri->tgt_index = 0;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
-
-	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
-	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
-		struct bpf_dtab_netdev *dst = fwd;
+	int err;
 
-		err = dev_map_generic_redirect(dst, skb, xdp_prog);
+	switch (map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		fallthrough;
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		err = dev_map_generic_redirect(fwd, skb, xdp_prog);
 		if (unlikely(err))
 			goto err;
-	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
-		struct xdp_sock *xs = fwd;
-
-		err = xsk_generic_rcv(xs, xdp);
+		break;
+	case BPF_MAP_TYPE_XSKMAP:
+		err = xsk_generic_rcv(fwd, xdp);
 		if (err)
 			goto err;
 		consume_skb(skb);
-	} else {
+		break;
+	default:
 		/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
 		err = -EBADRQC;
 		goto err;
 	}
 
-	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
 	return 0;
 err:
-	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
 	return err;
 }
 
@@ -4059,31 +4017,34 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	struct bpf_map *map = READ_ONCE(ri->map);
-	u32 index = ri->tgt_index;
-	struct net_device *fwd;
-	int err = 0;
-
-	if (map)
-		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
-						   map);
-	ri->tgt_index = 0;
-	fwd = dev_get_by_index_rcu(dev_net(dev), index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
+	enum bpf_map_type map_type = ri->map_type;
+	void *fwd = ri->tgt_value;
+	u32 map_id = ri->map_id;
+	int err;
 
-	err = xdp_ok_fwd_dev(fwd, skb->len);
-	if (unlikely(err))
-		goto err;
+	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+	ri->map_type = BPF_MAP_TYPE_UNSPEC;
 
-	skb->dev = fwd;
-	_trace_xdp_redirect(dev, xdp_prog, index);
-	generic_xdp_tx(skb, xdp_prog);
-	return 0;
+	if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
+		fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
+		if (unlikely(!fwd)) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		err = xdp_ok_fwd_dev(fwd, skb->len);
+		if (unlikely(err))
+			goto err;
+
+		skb->dev = fwd;
+		_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
+		generic_xdp_tx(skb, xdp_prog);
+		return 0;
+	}
+
+	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
 err:
-	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
 	return err;
 }
 
@@ -4094,10 +4055,12 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 	if (unlikely(flags))
 		return XDP_ABORTED;
 
-	ri->flags = flags;
+	/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
+	 * by map_idr) is used for ifindex based XDP redirect.
+	 */
 	ri->tgt_index = ifindex;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
+	ri->map_id = INT_MAX;
+	ri->map_type = BPF_MAP_TYPE_UNSPEC;
 
 	return XDP_REDIRECT;
 }
@@ -4113,28 +4076,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
 	   u64, flags)
 {
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-
-	/* Lower bits of the flags are used as return code on lookup failure */
-	if (unlikely(flags > XDP_TX))
-		return XDP_ABORTED;
-
-	ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
-	if (unlikely(!ri->tgt_value)) {
-		/* If the lookup fails we want to clear out the state in the
-		 * redirect_info struct completely, so that if an eBPF program
-		 * performs multiple lookups, the last one always takes
-		 * precedence.
-		 */
-		WRITE_ONCE(ri->map, NULL);
-		return flags;
-	}
-
-	ri->flags = flags;
-	ri->tgt_index = ifindex;
-	WRITE_ONCE(ri->map, map);
-
-	return XDP_REDIRECT;
+	return map->ops->map_redirect(map, ifindex, flags);
 }
 
 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
@@ -9663,22 +9605,40 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+/* data_end = skb->data + skb_headlen() */
+static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
+						    struct bpf_insn *insn)
+{
+	/* si->dst_reg = skb->data */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+			      si->dst_reg, si->src_reg,
+			      offsetof(struct sk_buff, data));
+	/* AX = skb->len */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+			      BPF_REG_AX, si->src_reg,
+			      offsetof(struct sk_buff, len));
+	/* si->dst_reg = skb->data + skb->len */
+	*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+	/* AX = skb->data_len */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
+			      BPF_REG_AX, si->src_reg,
+			      offsetof(struct sk_buff, data_len));
+	/* si->dst_reg = skb->data + skb->len - skb->data_len */
+	*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
+
+	return insn;
+}
+
 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
 				     const struct bpf_insn *si,
 				     struct bpf_insn *insn_buf,
 				     struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
-	int off;
 
 	switch (si->off) {
 	case offsetof(struct __sk_buff, data_end):
-		off  = si->off;
-		off -= offsetof(struct __sk_buff, data_end);
-		off += offsetof(struct sk_buff, cb);
-		off += offsetof(struct tcp_skb_cb, bpf.data_end);
-		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
-				      si->src_reg, off);
+		insn = bpf_convert_data_end_access(si, insn);
 		break;
 	default:
 		return bpf_convert_ctx_access(type, si, insn_buf, prog,
@@ -10457,6 +10417,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
 }
 
 const struct bpf_prog_ops sk_lookup_prog_ops = {
+	.test_run = bpf_prog_test_run_sk_lookup,
 };
 
 const struct bpf_verifier_ops sk_lookup_verifier_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a96a4f5de0ce..5985029e43d4 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -114,7 +114,7 @@ int flow_dissector_bpf_prog_attach_check(struct net *net,
  * is the protocol port offset returned from proto_ports_offset
  */
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
-			    void *data, int hlen)
+			    const void *data, int hlen)
 {
 	int poff = proto_ports_offset(ip_proto);
 
@@ -161,7 +161,7 @@ static bool icmp_has_id(u8 type)
  */
 void skb_flow_get_icmp_tci(const struct sk_buff *skb,
 			   struct flow_dissector_key_icmp *key_icmp,
-			   void *data, int thoff, int hlen)
+			   const void *data, int thoff, int hlen)
 {
 	struct icmphdr *ih, _ih;
 
@@ -187,8 +187,8 @@ EXPORT_SYMBOL(skb_flow_get_icmp_tci);
  */
 static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
 				    struct flow_dissector *flow_dissector,
-				    void *target_container,
-				    void *data, int thoff, int hlen)
+				    void *target_container, const void *data,
+				    int thoff, int hlen)
 {
 	struct flow_dissector_key_icmp *key_icmp;
 
@@ -409,8 +409,8 @@ EXPORT_SYMBOL(skb_flow_dissect_hash);
 static enum flow_dissect_ret
 __skb_flow_dissect_mpls(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container, void *data, int nhoff, int hlen,
-			int lse_index, bool *entropy_label)
+			void *target_container, const void *data, int nhoff,
+			int hlen, int lse_index, bool *entropy_label)
 {
 	struct mpls_label *hdr, _hdr;
 	u32 entry, label, bos;
@@ -467,7 +467,8 @@ __skb_flow_dissect_mpls(const struct sk_buff *skb,
 static enum flow_dissect_ret
 __skb_flow_dissect_arp(const struct sk_buff *skb,
 		       struct flow_dissector *flow_dissector,
-		       void *target_container, void *data, int nhoff, int hlen)
+		       void *target_container, const void *data,
+		       int nhoff, int hlen)
 {
 	struct flow_dissector_key_arp *key_arp;
 	struct {
@@ -523,7 +524,7 @@ static enum flow_dissect_ret
 __skb_flow_dissect_gre(const struct sk_buff *skb,
 		       struct flow_dissector_key_control *key_control,
 		       struct flow_dissector *flow_dissector,
-		       void *target_container, void *data,
+		       void *target_container, const void *data,
 		       __be16 *p_proto, int *p_nhoff, int *p_hlen,
 		       unsigned int flags)
 {
@@ -663,8 +664,8 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
 static enum flow_dissect_ret
 __skb_flow_dissect_batadv(const struct sk_buff *skb,
 			  struct flow_dissector_key_control *key_control,
-			  void *data, __be16 *p_proto, int *p_nhoff, int hlen,
-			  unsigned int flags)
+			  const void *data, __be16 *p_proto, int *p_nhoff,
+			  int hlen, unsigned int flags)
 {
 	struct {
 		struct batadv_unicast_packet batadv_unicast;
@@ -695,7 +696,8 @@ __skb_flow_dissect_batadv(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_tcp(const struct sk_buff *skb,
 		       struct flow_dissector *flow_dissector,
-		       void *target_container, void *data, int thoff, int hlen)
+		       void *target_container, const void *data,
+		       int thoff, int hlen)
 {
 	struct flow_dissector_key_tcp *key_tcp;
 	struct tcphdr *th, _th;
@@ -719,8 +721,8 @@ __skb_flow_dissect_tcp(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_ports(const struct sk_buff *skb,
 			 struct flow_dissector *flow_dissector,
-			 void *target_container, void *data, int nhoff,
-			 u8 ip_proto, int hlen)
+			 void *target_container, const void *data,
+			 int nhoff, u8 ip_proto, int hlen)
 {
 	enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
 	struct flow_dissector_key_ports *key_ports;
@@ -744,7 +746,8 @@ __skb_flow_dissect_ports(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_ipv4(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container, void *data, const struct iphdr *iph)
+			void *target_container, const void *data,
+			const struct iphdr *iph)
 {
 	struct flow_dissector_key_ip *key_ip;
 
@@ -761,7 +764,8 @@ __skb_flow_dissect_ipv4(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_ipv6(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container, void *data, const struct ipv6hdr *iph)
+			void *target_container, const void *data,
+			const struct ipv6hdr *iph)
 {
 	struct flow_dissector_key_ip *key_ip;
 
@@ -908,9 +912,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 bool __skb_flow_dissect(const struct net *net,
 			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container,
-			void *data, __be16 proto, int nhoff, int hlen,
-			unsigned int flags)
+			void *target_container, const void *data,
+			__be16 proto, int nhoff, int hlen, unsigned int flags)
 {
 	struct flow_dissector_key_control *key_control;
 	struct flow_dissector_key_basic *key_basic;
@@ -1642,7 +1645,7 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
 		   const struct flow_keys_basic *keys, int hlen)
 {
 	u32 poff = keys->control.thoff;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index c714e6a9dad4..d8b9dbabd4a4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -10,9 +10,6 @@
 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
 
-extern struct list_head ptype_all __read_mostly;
-extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-
 static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 307628fdf380..f6197774048b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1361,83 +1361,94 @@ static const struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static ssize_t xps_cpus_show(struct netdev_queue *queue,
-			     char *buf)
+static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
+			      int tc, char *buf, enum xps_map_type type)
 {
-	int cpu, len, ret, num_tc = 1, tc = 0;
-	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
-	cpumask_var_t mask;
-	unsigned long index;
-
-	if (!netif_is_multiqueue(dev))
-		return -ENOENT;
+	unsigned long *mask;
+	unsigned int nr_ids;
+	int j, len;
 
-	index = get_netdev_queue_index(queue);
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps[type]);
 
-	if (!rtnl_trylock())
-		return restart_syscall();
+	/* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0
+	 * when dev_maps hasn't been allocated yet, to be backward compatible.
+	 */
+	nr_ids = dev_maps ? dev_maps->nr_ids :
+		 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
 
-	if (dev->num_tc) {
-		/* Do not allow XPS on subordinate device directly */
-		num_tc = dev->num_tc;
-		if (num_tc < 0) {
-			ret = -EINVAL;
-			goto err_rtnl_unlock;
-		}
+	mask = bitmap_zalloc(nr_ids, GFP_NOWAIT);
+	if (!mask) {
+		rcu_read_unlock();
+		return -ENOMEM;
+	}
 
-		/* If queue belongs to subordinate dev use its map */
-		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+	if (!dev_maps || tc >= dev_maps->num_tc)
+		goto out_no_maps;
 
-		tc = netdev_txq_to_tc(dev, index);
-		if (tc < 0) {
-			ret = -EINVAL;
-			goto err_rtnl_unlock;
-		}
-	}
+	for (j = 0; j < nr_ids; j++) {
+		int i, tci = j * dev_maps->num_tc + tc;
+		struct xps_map *map;
 
-	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto err_rtnl_unlock;
-	}
+		map = rcu_dereference(dev_maps->attr_map[tci]);
+		if (!map)
+			continue;
 
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_cpus_map);
-	if (dev_maps) {
-		for_each_possible_cpu(cpu) {
-			int i, tci = cpu * num_tc + tc;
-			struct xps_map *map;
-
-			map = rcu_dereference(dev_maps->attr_map[tci]);
-			if (!map)
-				continue;
-
-			for (i = map->len; i--;) {
-				if (map->queues[i] == index) {
-					cpumask_set_cpu(cpu, mask);
-					break;
-				}
+		for (i = map->len; i--;) {
+			if (map->queues[i] == index) {
+				set_bit(j, mask);
+				break;
 			}
 		}
 	}
+out_no_maps:
 	rcu_read_unlock();
 
-	rtnl_unlock();
+	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
+	bitmap_free(mask);
 
-	len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
-	free_cpumask_var(mask);
 	return len < PAGE_SIZE ? len : -EINVAL;
+}
+
+static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	unsigned int index;
+	int len, tc;
+
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
 
-err_rtnl_unlock:
+	index = get_netdev_queue_index(queue);
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	/* If queue belongs to subordinate dev use its map */
+	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
+	tc = netdev_txq_to_tc(dev, index);
+	if (tc < 0) {
+		rtnl_unlock();
+		return -EINVAL;
+	}
+
+	/* Make sure the subordinate device can't be freed */
+	get_device(&dev->dev);
 	rtnl_unlock();
-	return ret;
+
+	len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
+
+	put_device(&dev->dev);
+	return len;
 }
 
 static ssize_t xps_cpus_store(struct netdev_queue *queue,
 			      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
-	unsigned long index;
+	unsigned int index;
 	cpumask_var_t mask;
 	int err;
 
@@ -1476,64 +1487,21 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
 
 static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 {
-	int j, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
-	struct xps_dev_maps *dev_maps;
-	unsigned long *mask, index;
+	unsigned int index;
+	int tc;
 
 	index = get_netdev_queue_index(queue);
 
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (dev->num_tc) {
-		num_tc = dev->num_tc;
-		tc = netdev_txq_to_tc(dev, index);
-		if (tc < 0) {
-			ret = -EINVAL;
-			goto err_rtnl_unlock;
-		}
-	}
-	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
-	if (!mask) {
-		ret = -ENOMEM;
-		goto err_rtnl_unlock;
-	}
-
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_rxqs_map);
-	if (!dev_maps)
-		goto out_no_maps;
-
-	for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues),
-	     j < dev->num_rx_queues;) {
-		int i, tci = j * num_tc + tc;
-		struct xps_map *map;
-
-		map = rcu_dereference(dev_maps->attr_map[tci]);
-		if (!map)
-			continue;
-
-		for (i = map->len; i--;) {
-			if (map->queues[i] == index) {
-				set_bit(j, mask);
-				break;
-			}
-		}
-	}
-out_no_maps:
-	rcu_read_unlock();
-
+	tc = netdev_txq_to_tc(dev, index);
 	rtnl_unlock();
+	if (tc < 0)
+		return -EINVAL;
 
-	len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
-	bitmap_free(mask);
-
-	return len < PAGE_SIZE ? len : -EINVAL;
-
-err_rtnl_unlock:
-	rtnl_unlock();
-	return ret;
+	return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
 }
 
 static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
@@ -1541,7 +1509,8 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
 {
 	struct net_device *dev = queue->dev;
 	struct net *net = dev_net(dev);
-	unsigned long *mask, index;
+	unsigned long *mask;
+	unsigned int index;
 	int err;
 
 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -1565,7 +1534,7 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
 	}
 
 	cpus_read_lock();
-	err = __netif_set_xps_queue(dev, mask, index, true);
+	err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS);
 	cpus_read_unlock();
 
 	rtnl_unlock();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c421c8f80925..e8320b5d651a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3732,13 +3732,13 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int delta_truesize = 0;
 	unsigned int delta_len = 0;
-	struct sk_buff *tail = NULL;
 	struct sk_buff *nskb, *tmp;
 	int err;
 
 	skb_push(skb, -skb_network_offset(skb) + offset);
 
 	skb_shinfo(skb)->frag_list = NULL;
+	skb->next = list_skb;
 
 	do {
 		nskb = list_skb;
@@ -3756,17 +3756,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 			}
 		}
 
-		if (!tail)
-			skb->next = nskb;
-		else
-			tail->next = nskb;
-
-		if (unlikely(err)) {
-			nskb->next = list_skb;
+		if (unlikely(err))
 			goto err_linearize;
-		}
-
-		tail = nskb;
 
 		delta_len += nskb->len;
 		delta_truesize += nskb->truesize;
@@ -3793,7 +3784,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 
 	skb_gso_reset(skb);
 
-	skb->prev = tail;
+	skb->prev = nskb;
 
 	if (skb_needs_linearize(skb, features) &&
 	    __skb_linearize(skb))
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 1261512d6807..07f54015238a 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -525,7 +525,8 @@ static void sk_psock_backlog(struct work_struct *work)
 		len = skb->len;
 		off = 0;
 start:
-		ingress = tcp_skb_bpf_ingress(skb);
+		ingress = skb_bpf_ingress(skb);
+		skb_bpf_redirect_clear(skb);
 		do {
 			ret = -EIO;
 			if (likely(psock->sk->sk_socket))
@@ -618,7 +619,7 @@ struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
 	return link;
 }
 
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
+static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 {
 	struct sk_msg *msg, *tmp;
 
@@ -631,7 +632,12 @@ void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 
 static void sk_psock_zap_ingress(struct sk_psock *psock)
 {
-	__skb_queue_purge(&psock->ingress_skb);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&psock->ingress_skb)) != NULL) {
+		skb_bpf_redirect_clear(skb);
+		kfree_skb(skb);
+	}
 	__sk_psock_purge_ingress_msg(psock);
 }
 
@@ -645,15 +651,15 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
 	}
 }
 
+static void sk_psock_done_strp(struct sk_psock *psock);
+
 static void sk_psock_destroy_deferred(struct work_struct *gc)
 {
 	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
 
 	/* No sk_callback_lock since already detached. */
 
-	/* Parser has been stopped */
-	if (psock->progs.skb_parser)
-		strp_done(&psock->parser.strp);
+	sk_psock_done_strp(psock);
 
 	cancel_work_sync(&psock->work);
 
@@ -685,9 +691,9 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 	write_lock_bh(&sk->sk_callback_lock);
 	sk_psock_restore_proto(sk, psock);
 	rcu_assign_sk_user_data(sk, NULL);
-	if (psock->progs.skb_parser)
+	if (psock->progs.stream_parser)
 		sk_psock_stop_strp(sk, psock);
-	else if (psock->progs.skb_verdict)
+	else if (psock->progs.stream_verdict)
 		sk_psock_stop_verdict(sk, psock);
 	write_unlock_bh(&sk->sk_callback_lock);
 	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
@@ -743,27 +749,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
 
-static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
-			    struct sk_buff *skb)
-{
-	bpf_compute_data_end_sk_skb(skb);
-	return bpf_prog_run_pin_on_cpu(prog, skb);
-}
-
-static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
-{
-	struct sk_psock_parser *parser;
-
-	parser = container_of(strp, struct sk_psock_parser, strp);
-	return container_of(parser, struct sk_psock, parser);
-}
-
 static void sk_psock_skb_redirect(struct sk_buff *skb)
 {
 	struct sk_psock *psock_other;
 	struct sock *sk_other;
 
-	sk_other = tcp_skb_bpf_redirect_fetch(skb);
+	sk_other = skb_bpf_redirect_fetch(skb);
 	/* This error is a buggy BPF program, it returned a redirect
 	 * return code, but then didn't set a redirect interface.
 	 */
@@ -806,16 +797,17 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
 	int ret = __SK_PASS;
 
 	rcu_read_lock();
-	prog = READ_ONCE(psock->progs.skb_verdict);
+	prog = READ_ONCE(psock->progs.stream_verdict);
 	if (likely(prog)) {
 		/* We skip full set_owner_r here because if we do a SK_PASS
 		 * or SK_DROP we can skip skb memory accounting and use the
 		 * TLS context.
 		 */
 		skb->sk = psock->sk;
-		tcp_skb_bpf_redirect_clear(skb);
-		ret = sk_psock_bpf_run(psock, prog, skb);
-		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+		skb_dst_drop(skb);
+		skb_bpf_redirect_clear(skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
+		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
 	sk_psock_tls_verdict_apply(skb, psock->sk, ret);
@@ -827,7 +819,6 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
 static void sk_psock_verdict_apply(struct sk_psock *psock,
 				   struct sk_buff *skb, int verdict)
 {
-	struct tcp_skb_cb *tcp;
 	struct sock *sk_other;
 	int err = -EIO;
 
@@ -839,8 +830,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 			goto out_free;
 		}
 
-		tcp = TCP_SKB_CB(skb);
-		tcp->bpf.flags |= BPF_F_INGRESS;
+		skb_bpf_set_ingress(skb);
 
 		/* If the queue is empty then we can submit directly
 		 * into the msg queue. If its not empty we have to
@@ -866,6 +856,24 @@ out_free:
 	}
 }
 
+static void sk_psock_write_space(struct sock *sk)
+{
+	struct sk_psock *psock;
+	void (*write_space)(struct sock *sk) = NULL;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock)) {
+		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+			schedule_work(&psock->work);
+		write_space = psock->saved_write_space;
+	}
+	rcu_read_unlock();
+	if (write_space)
+		write_space(sk);
+}
+
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 {
 	struct sk_psock *psock;
@@ -881,11 +889,12 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 		goto out;
 	}
 	skb_set_owner_r(skb, sk);
-	prog = READ_ONCE(psock->progs.skb_verdict);
+	prog = READ_ONCE(psock->progs.stream_verdict);
 	if (likely(prog)) {
-		tcp_skb_bpf_redirect_clear(skb);
-		ret = sk_psock_bpf_run(psock, prog, skb);
-		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+		skb_dst_drop(skb);
+		skb_bpf_redirect_clear(skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
+		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 	}
 	sk_psock_verdict_apply(psock, skb, ret);
 out:
@@ -899,15 +908,15 @@ static int sk_psock_strp_read_done(struct strparser *strp, int err)
 
 static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
 {
-	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct sk_psock *psock = container_of(strp, struct sk_psock, strp);
 	struct bpf_prog *prog;
 	int ret = skb->len;
 
 	rcu_read_lock();
-	prog = READ_ONCE(psock->progs.skb_parser);
+	prog = READ_ONCE(psock->progs.stream_parser);
 	if (likely(prog)) {
 		skb->sk = psock->sk;
-		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 		skb->sk = NULL;
 	}
 	rcu_read_unlock();
@@ -923,16 +932,59 @@ static void sk_psock_strp_data_ready(struct sock *sk)
 	psock = sk_psock(sk);
 	if (likely(psock)) {
 		if (tls_sw_has_ctx_rx(sk)) {
-			psock->parser.saved_data_ready(sk);
+			psock->saved_data_ready(sk);
 		} else {
 			write_lock_bh(&sk->sk_callback_lock);
-			strp_data_ready(&psock->parser.strp);
+			strp_data_ready(&psock->strp);
 			write_unlock_bh(&sk->sk_callback_lock);
 		}
 	}
 	rcu_read_unlock();
 }
 
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+	static const struct strp_callbacks cb = {
+		.rcv_msg	= sk_psock_strp_read,
+		.read_sock_done	= sk_psock_strp_read_done,
+		.parse_msg	= sk_psock_strp_parse,
+	};
+
+	return strp_init(&psock->strp, sk, &cb);
+}
+
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+	if (psock->saved_data_ready)
+		return;
+
+	psock->saved_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready = sk_psock_strp_data_ready;
+	sk->sk_write_space = sk_psock_write_space;
+}
+
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+	if (!psock->saved_data_ready)
+		return;
+
+	sk->sk_data_ready = psock->saved_data_ready;
+	psock->saved_data_ready = NULL;
+	strp_stop(&psock->strp);
+}
+
+static void sk_psock_done_strp(struct sk_psock *psock)
+{
+	/* Parser has been stopped */
+	if (psock->progs.stream_parser)
+		strp_done(&psock->strp);
+}
+#else
+static void sk_psock_done_strp(struct sk_psock *psock)
+{
+}
+#endif /* CONFIG_BPF_STREAM_PARSER */
+
 static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 				 unsigned int offset, size_t orig_len)
 {
@@ -957,11 +1009,12 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 		goto out;
 	}
 	skb_set_owner_r(skb, sk);
-	prog = READ_ONCE(psock->progs.skb_verdict);
+	prog = READ_ONCE(psock->progs.stream_verdict);
 	if (likely(prog)) {
-		tcp_skb_bpf_redirect_clear(skb);
-		ret = sk_psock_bpf_run(psock, prog, skb);
-		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+		skb_dst_drop(skb);
+		skb_bpf_redirect_clear(skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
+		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 	}
 	sk_psock_verdict_apply(psock, skb, ret);
 out:
@@ -984,82 +1037,21 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
 	sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
 }
 
-static void sk_psock_write_space(struct sock *sk)
-{
-	struct sk_psock *psock;
-	void (*write_space)(struct sock *sk) = NULL;
-
-	rcu_read_lock();
-	psock = sk_psock(sk);
-	if (likely(psock)) {
-		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
-			schedule_work(&psock->work);
-		write_space = psock->saved_write_space;
-	}
-	rcu_read_unlock();
-	if (write_space)
-		write_space(sk);
-}
-
-int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
-{
-	static const struct strp_callbacks cb = {
-		.rcv_msg	= sk_psock_strp_read,
-		.read_sock_done	= sk_psock_strp_read_done,
-		.parse_msg	= sk_psock_strp_parse,
-	};
-
-	psock->parser.enabled = false;
-	return strp_init(&psock->parser.strp, sk, &cb);
-}
-
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
 {
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (parser->enabled)
+	if (psock->saved_data_ready)
 		return;
 
-	parser->saved_data_ready = sk->sk_data_ready;
+	psock->saved_data_ready = sk->sk_data_ready;
 	sk->sk_data_ready = sk_psock_verdict_data_ready;
 	sk->sk_write_space = sk_psock_write_space;
-	parser->enabled = true;
-}
-
-void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
-{
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (parser->enabled)
-		return;
-
-	parser->saved_data_ready = sk->sk_data_ready;
-	sk->sk_data_ready = sk_psock_strp_data_ready;
-	sk->sk_write_space = sk_psock_write_space;
-	parser->enabled = true;
-}
-
-void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
-{
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (!parser->enabled)
-		return;
-
-	sk->sk_data_ready = parser->saved_data_ready;
-	parser->saved_data_ready = NULL;
-	strp_stop(&parser->strp);
-	parser->enabled = false;
 }
 
 void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
 {
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (!parser->enabled)
+	if (!psock->saved_data_ready)
 		return;
 
-	sk->sk_data_ready = parser->saved_data_ready;
-	parser->saved_data_ready = NULL;
-	parser->enabled = false;
+	sk->sk_data_ready = psock->saved_data_ready;
+	psock->saved_data_ready = NULL;
 }
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d758fb83c884..dd53a7771d7e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -24,6 +24,9 @@ struct bpf_stab {
 #define SOCK_CREATE_FLAG_MASK				\
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+				struct bpf_prog *old, u32 which);
+
 static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_stab *stab;
@@ -148,9 +151,9 @@ static void sock_map_del_link(struct sock *sk,
 			struct bpf_map *map = link->map;
 			struct bpf_stab *stab = container_of(map, struct bpf_stab,
 							     map);
-			if (psock->parser.enabled && stab->progs.skb_parser)
+			if (psock->saved_data_ready && stab->progs.stream_parser)
 				strp_stop = true;
-			if (psock->parser.enabled && stab->progs.skb_verdict)
+			if (psock->saved_data_ready && stab->progs.stream_verdict)
 				verdict_stop = true;
 			list_del(&link->list);
 			sk_psock_free_link(link);
@@ -224,23 +227,23 @@ out:
 static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 			 struct sock *sk)
 {
-	struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
+	struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
 	struct sk_psock *psock;
 	int ret;
 
-	skb_verdict = READ_ONCE(progs->skb_verdict);
-	if (skb_verdict) {
-		skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
-		if (IS_ERR(skb_verdict))
-			return PTR_ERR(skb_verdict);
+	stream_verdict = READ_ONCE(progs->stream_verdict);
+	if (stream_verdict) {
+		stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
+		if (IS_ERR(stream_verdict))
+			return PTR_ERR(stream_verdict);
 	}
 
-	skb_parser = READ_ONCE(progs->skb_parser);
-	if (skb_parser) {
-		skb_parser = bpf_prog_inc_not_zero(skb_parser);
-		if (IS_ERR(skb_parser)) {
-			ret = PTR_ERR(skb_parser);
-			goto out_put_skb_verdict;
+	stream_parser = READ_ONCE(progs->stream_parser);
+	if (stream_parser) {
+		stream_parser = bpf_prog_inc_not_zero(stream_parser);
+		if (IS_ERR(stream_parser)) {
+			ret = PTR_ERR(stream_parser);
+			goto out_put_stream_verdict;
 		}
 	}
 
@@ -249,7 +252,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		msg_parser = bpf_prog_inc_not_zero(msg_parser);
 		if (IS_ERR(msg_parser)) {
 			ret = PTR_ERR(msg_parser);
-			goto out_put_skb_parser;
+			goto out_put_stream_parser;
 		}
 	}
 
@@ -261,8 +264,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 
 	if (psock) {
 		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
-		    (skb_parser  && READ_ONCE(psock->progs.skb_parser)) ||
-		    (skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
+		    (stream_parser  && READ_ONCE(psock->progs.stream_parser)) ||
+		    (stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
 			sk_psock_put(sk, psock);
 			ret = -EBUSY;
 			goto out_progs;
@@ -283,15 +286,15 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		goto out_drop;
 
 	write_lock_bh(&sk->sk_callback_lock);
-	if (skb_parser && skb_verdict && !psock->parser.enabled) {
+	if (stream_parser && stream_verdict && !psock->saved_data_ready) {
 		ret = sk_psock_init_strp(sk, psock);
 		if (ret)
 			goto out_unlock_drop;
-		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
-		psock_set_prog(&psock->progs.skb_parser, skb_parser);
+		psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
+		psock_set_prog(&psock->progs.stream_parser, stream_parser);
 		sk_psock_start_strp(sk, psock);
-	} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
-		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+	} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
+		psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
 		sk_psock_start_verdict(sk,psock);
 	}
 	write_unlock_bh(&sk->sk_callback_lock);
@@ -303,12 +306,12 @@ out_drop:
 out_progs:
 	if (msg_parser)
 		bpf_prog_put(msg_parser);
-out_put_skb_parser:
-	if (skb_parser)
-		bpf_prog_put(skb_parser);
-out_put_skb_verdict:
-	if (skb_verdict)
-		bpf_prog_put(skb_verdict);
+out_put_stream_parser:
+	if (stream_parser)
+		bpf_prog_put(stream_parser);
+out_put_stream_verdict:
+	if (stream_verdict)
+		bpf_prog_put(stream_verdict);
 	return ret;
 }
 
@@ -657,7 +660,6 @@ const struct bpf_func_proto bpf_sock_map_update_proto = {
 BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 	   struct bpf_map *, map, u32, key, u64, flags)
 {
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 	struct sock *sk;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
@@ -667,8 +669,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 	if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
 		return SK_DROP;
 
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = sk;
+	skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
 	return SK_PASS;
 }
 
@@ -1250,7 +1251,6 @@ const struct bpf_func_proto bpf_sock_hash_update_proto = {
 BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 	struct sock *sk;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
@@ -1260,8 +1260,7 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
 	if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
 		return SK_DROP;
 
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = sk;
+	skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
 	return SK_PASS;
 }
 
@@ -1448,8 +1447,8 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
 	return NULL;
 }
 
-int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-			 struct bpf_prog *old, u32 which)
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+				struct bpf_prog *old, u32 which)
 {
 	struct sk_psock_progs *progs = sock_map_progs(map);
 	struct bpf_prog **pprog;
@@ -1461,11 +1460,13 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 	case BPF_SK_MSG_VERDICT:
 		pprog = &progs->msg_parser;
 		break;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	case BPF_SK_SKB_STREAM_PARSER:
-		pprog = &progs->skb_parser;
+		pprog = &progs->stream_parser;
 		break;
+#endif
 	case BPF_SK_SKB_STREAM_VERDICT:
-		pprog = &progs->skb_verdict;
+		pprog = &progs->stream_verdict;
 		break;
 	default:
 		return -EOPNOTSUPP;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4567de519603..d84c8a1b280e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@
 
 static int two = 2;
 static int three = 3;
+static int int_3600 = 3600;
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
 static int max_skb_frags = MAX_SKB_FRAGS;
@@ -570,6 +571,15 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "netdev_unregister_timeout_secs",
+		.data		= &netdev_unregister_timeout_secs,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &int_3600,
+	},
 	{ }
 };
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2193ae529e75..32b1bed8ae51 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -84,8 +84,7 @@
 #include <net/dn_neigh.h>
 #include <net/dn_fib.h>
 
-struct dn_rt_hash_bucket
-{
+struct dn_rt_hash_bucket {
 	struct dn_route __rcu *chain;
 	spinlock_t lock;
 };
@@ -93,7 +92,7 @@ struct dn_rt_hash_bucket
 extern struct neigh_table dn_neigh_table;
 
 
-static unsigned char dn_hiord_addr[6] = {0xAA,0x00,0x04,0x00,0x00,0x00};
+static unsigned char dn_hiord_addr[6] = {0xAA, 0x00, 0x04, 0x00, 0x00, 0x00};
 
 static const int dn_rt_min_delay = 2 * HZ;
 static const int dn_rt_max_delay = 10 * HZ;
@@ -359,10 +358,11 @@ static void dn_run_flush(struct timer_list *unused)
 	for (i = 0; i < dn_rt_hash_mask; i++) {
 		spin_lock_bh(&dn_rt_hash_table[i].lock);
 
-		if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
+		rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL);
+		if (!rt)
 			goto nothing_to_declare;
 
-		for(; rt; rt = next) {
+		for (; rt; rt = next) {
 			next = rcu_dereference_raw(rt->dn_next);
 			RCU_INIT_POINTER(rt->dn_next, NULL);
 			dst_dev_put(&rt->dst);
@@ -425,7 +425,8 @@ static int dn_return_short(struct sk_buff *skb)
 	/* Add back headers */
 	skb_push(skb, skb->data - skb_network_header(skb));
 
-	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
 		return NET_RX_DROP;
 
 	cb = DN_SKB_CB(skb);
@@ -461,7 +462,8 @@ static int dn_return_long(struct sk_buff *skb)
 	/* Add back all headers */
 	skb_push(skb, skb->data - skb_network_header(skb));
 
-	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
 		return NET_RX_DROP;
 
 	cb = DN_SKB_CB(skb);
@@ -505,7 +507,8 @@ static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *
 	struct dn_skb_cb *cb;
 	int err;
 
-	if ((err = dn_route_input(skb)) == 0)
+	err = dn_route_input(skb);
+	if (err == 0)
 		return dst_input(skb);
 
 	cb = DN_SKB_CB(skb);
@@ -629,7 +632,8 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 	if (dn == NULL)
 		goto dump_it;
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
 		goto out;
 
 	if (!pskb_may_pull(skb, 3))
@@ -898,7 +902,7 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2)
 {
 	__u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
 	int match = 16;
-	while(tmp) {
+	while (tmp) {
 		tmp >>= 1;
 		match--;
 	}
@@ -1324,7 +1328,8 @@ static int dn_route_input_slow(struct sk_buff *skb)
 
 	dev_hold(in_dev);
 
-	if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
+	dn_db = rcu_dereference(in_dev->dn_ptr);
+	if (!dn_db)
 		goto out;
 
 	/* Zero source addresses are not allowed */
@@ -1383,7 +1388,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 		fld.saddr = src_map;
 	}
 
-	switch(res.type) {
+	switch (res.type) {
 	case RTN_UNICAST:
 		/*
 		 * Forwarding check here, we only check for forwarding
@@ -1407,7 +1412,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 			flags |= RTCF_DOREDIRECT;
 
 		local_src = DN_FIB_RES_PREFSRC(res);
-
+		break;
 	case RTN_BLACKHOLE:
 	case RTN_UNREACHABLE:
 		break;
@@ -1526,7 +1531,7 @@ static int dn_route_input(struct sk_buff *skb)
 		return 0;
 
 	rcu_read_lock();
-	for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
+	for (rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
 	    rt = rcu_dereference(rt->dn_next)) {
 		if ((rt->fld.saddr == cb->src) &&
 		    (rt->fld.daddr == cb->dst) &&
@@ -1739,13 +1744,13 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
-	for(h = 0; h <= dn_rt_hash_mask; h++) {
+	for (h = 0; h <= dn_rt_hash_mask; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
 			s_idx = 0;
 		rcu_read_lock_bh();
-		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
+		for (rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
 			rt = rcu_dereference_bh(rt->dn_next), idx++) {
 			if (idx < s_idx)
@@ -1779,7 +1784,7 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
 	struct dn_route *rt = NULL;
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
+	for (s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
 		rcu_read_lock_bh();
 		rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
 		if (rt)
@@ -1809,7 +1814,7 @@ static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
 	struct dn_route *rt = dn_rt_cache_get_first(seq);
 
 	if (rt) {
-		while(*pos && (rt = dn_rt_cache_get_next(seq, rt)))
+		while (*pos && (rt = dn_rt_cache_get_next(seq, rt)))
 			--*pos;
 	}
 	return *pos ? NULL : rt;
@@ -1864,21 +1869,21 @@ void __init dn_route_init(void)
 
 	goal = totalram_pages() >> (26 - PAGE_SHIFT);
 
-	for(order = 0; (1UL << order) < goal; order++)
+	for (order = 0; (1UL << order) < goal; order++)
 		/* NOTHING */;
 
 	/*
 	 * Only want 1024 entries max, since the table is very, very unlikely
 	 * to be larger than that.
 	 */
-	while(order && ((((1UL << order) * PAGE_SIZE) /
+	while (order && ((((1UL << order) * PAGE_SIZE) /
 				sizeof(struct dn_rt_hash_bucket)) >= 2048))
 		order--;
 
 	do {
 		dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
 			sizeof(struct dn_rt_hash_bucket);
-		while(dn_rt_hash_mask & (dn_rt_hash_mask - 1))
+		while (dn_rt_hash_mask & (dn_rt_hash_mask - 1))
 			dn_rt_hash_mask--;
 		dn_rt_hash_table = (struct dn_rt_hash_bucket *)
 			__get_free_pages(GFP_ATOMIC, order);
@@ -1893,7 +1898,7 @@ void __init dn_route_init(void)
 		(long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
 
 	dn_rt_hash_mask--;
-	for(i = 0; i <= dn_rt_hash_mask; i++) {
+	for (i = 0; i <= dn_rt_hash_mask; i++) {
 		spin_lock_init(&dn_rt_hash_table[i].lock);
 		dn_rt_hash_table[i].chain = NULL;
 	}
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 58b8fc82cd3c..8746b07668ae 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,15 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config HAVE_NET_DSA
-	def_bool y
-	depends on INET && NETDEVICES && !S390
-
-# Drivers must select NET_DSA and the appropriate tagging format
 
 menuconfig NET_DSA
 	tristate "Distributed Switch Architecture"
-	depends on HAVE_NET_DSA
 	depends on BRIDGE || BRIDGE=n
 	depends on HSR || HSR=n
+	depends on INET && NETDEVICES
 	select GRO_CELLS
 	select NET_SWITCHDEV
 	select PHYLINK
@@ -20,7 +15,8 @@ menuconfig NET_DSA
 
 if NET_DSA
 
-# tagging formats
+# Drivers must select the appropriate tagging format(s)
+
 config NET_DSA_TAG_8021Q
 	tristate
 	select VLAN_8021Q
@@ -48,6 +44,13 @@ config NET_DSA_TAG_BRCM
 	  Say Y if you want to enable support for tagging frames for the
 	  Broadcom switches which place the tag after the MAC source address.
 
+config NET_DSA_TAG_BRCM_LEGACY
+	tristate "Tag driver for Broadcom legacy switches using in-frame headers"
+	select NET_DSA_TAG_BRCM_COMMON
+	help
+	  Say Y if you want to enable support for tagging frames for the
+	  Broadcom legacy switches which place the tag after the MAC source
+	  address.
 
 config NET_DSA_TAG_BRCM_PREPEND
 	tristate "Tag driver for Broadcom switches using prepended headers"
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9d4b0e9b1aa1..92282de54230 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -181,12 +181,14 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
 void dsa_port_disable_rt(struct dsa_port *dp);
 void dsa_port_disable(struct dsa_port *dp);
-int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
+			 struct netlink_ext_ack *extack);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_lag_change(struct dsa_port *dp,
 			struct netdev_lag_lower_state_info *linfo);
 int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
-		      struct netdev_lag_upper_info *uinfo);
+		      struct netdev_lag_upper_info *uinfo,
+		      struct netlink_ext_ack *extack);
 void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 			    struct netlink_ext_ack *extack);
@@ -233,19 +235,7 @@ extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
 						 struct net_device *dev)
 {
-	/* Switchdev offloading can be configured on: */
-
-	if (dev == dp->slave)
-		/* DSA ports directly connected to a bridge, and event
-		 * was emitted for the ports themselves.
-		 */
-		return true;
-
-	if (dp->lag_dev == dev)
-		/* DSA ports connected to a bridge via a LAG */
-		return true;
-
-	return false;
+	return dsa_port_to_bridge_port(dp) == dev;
 }
 
 static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
@@ -272,6 +262,9 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
+extern struct notifier_block dsa_slave_switchdev_notifier;
+extern struct notifier_block dsa_slave_switchdev_blocking_notifier;
+
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
 int dsa_slave_create(struct dsa_port *dp);
 void dsa_slave_destroy(struct net_device *slave_dev);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c9c6d7ab3f47..01e30264b25b 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -122,29 +122,132 @@ void dsa_port_disable(struct dsa_port *dp)
 	rtnl_unlock();
 }
 
-static void dsa_port_change_brport_flags(struct dsa_port *dp,
-					 bool bridge_offload)
+static int dsa_port_inherit_brport_flags(struct dsa_port *dp,
+					 struct netlink_ext_ack *extack)
 {
-	struct switchdev_brport_flags flags;
-	int flag;
+	const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+				   BR_BCAST_FLOOD;
+	struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+	int flag, err;
 
-	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
-	if (bridge_offload)
-		flags.val = flags.mask;
-	else
-		flags.val = flags.mask & ~BR_LEARNING;
+	for_each_set_bit(flag, &mask, 32) {
+		struct switchdev_brport_flags flags = {0};
+
+		flags.mask = BIT(flag);
+
+		if (br_port_flag_is_set(brport_dev, BIT(flag)))
+			flags.val = BIT(flag);
 
-	for_each_set_bit(flag, &flags.mask, 32) {
-		struct switchdev_brport_flags tmp;
+		err = dsa_port_bridge_flags(dp, flags, extack);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
 
-		tmp.val = flags.val & BIT(flag);
-		tmp.mask = BIT(flag);
+	return 0;
+}
 
-		dsa_port_bridge_flags(dp, tmp, NULL);
+static void dsa_port_clear_brport_flags(struct dsa_port *dp)
+{
+	const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
+	const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+				   BR_BCAST_FLOOD;
+	int flag, err;
+
+	for_each_set_bit(flag, &mask, 32) {
+		struct switchdev_brport_flags flags = {0};
+
+		flags.mask = BIT(flag);
+		flags.val = val & BIT(flag);
+
+		err = dsa_port_bridge_flags(dp, flags, NULL);
+		if (err && err != -EOPNOTSUPP)
+			dev_err(dp->ds->dev,
+				"failed to clear bridge port flag %lu: %pe\n",
+				flags.val, ERR_PTR(err));
 	}
 }
 
-int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
+static int dsa_port_switchdev_sync(struct dsa_port *dp,
+				   struct netlink_ext_ack *extack)
+{
+	struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+	struct net_device *br = dp->bridge_dev;
+	int err;
+
+	err = dsa_port_inherit_brport_flags(dp, extack);
+	if (err)
+		return err;
+
+	err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = dsa_port_vlan_filtering(dp, br_vlan_enabled(br), extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_mdb_replay(br, brport_dev,
+			    &dsa_slave_switchdev_blocking_notifier,
+			    extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_fdb_replay(br, brport_dev, &dsa_slave_switchdev_notifier);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_vlan_replay(br, brport_dev,
+			     &dsa_slave_switchdev_blocking_notifier,
+			     extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
+
+static void dsa_port_switchdev_unsync(struct dsa_port *dp)
+{
+	/* Configure the port for standalone mode (no address learning,
+	 * flood everything).
+	 * The bridge only emits SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS events
+	 * when the user requests it through netlink or sysfs, but not
+	 * automatically at port join or leave, so we need to handle resetting
+	 * the brport flags ourselves. But we even prefer it that way, because
+	 * otherwise, some setups might never get the notification they need,
+	 * for example, when a port leaves a LAG that offloads the bridge,
+	 * it becomes standalone, but as far as the bridge is concerned, no
+	 * port ever left.
+	 */
+	dsa_port_clear_brport_flags(dp);
+
+	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
+	 */
+	dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+
+	/* VLAN filtering is handled by dsa_switch_bridge_leave */
+
+	/* Some drivers treat the notification for having a local multicast
+	 * router by allowing multicast to be flooded to the CPU, so we should
+	 * allow this in standalone mode too.
+	 */
+	dsa_port_mrouter(dp->cpu_dp, true, NULL);
+
+	/* Ageing time may be global to the switch chip, so don't change it
+	 * here because we have no good reason (or value) to change it to.
+	 */
+}
+
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
+			 struct netlink_ext_ack *extack)
 {
 	struct dsa_notifier_bridge_info info = {
 		.tree_index = dp->ds->dst->index,
@@ -154,24 +257,25 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
 	};
 	int err;
 
-	/* Notify the port driver to set its configurable flags in a way that
-	 * matches the initial settings of a bridge port.
-	 */
-	dsa_port_change_brport_flags(dp, true);
-
 	/* Here the interface is already bridged. Reflect the current
 	 * configuration so that drivers can program their chips accordingly.
 	 */
 	dp->bridge_dev = br;
 
 	err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
+	if (err)
+		goto out_rollback;
 
-	/* The bridging is rolled back on error */
-	if (err) {
-		dsa_port_change_brport_flags(dp, false);
-		dp->bridge_dev = NULL;
-	}
+	err = dsa_port_switchdev_sync(dp, extack);
+	if (err)
+		goto out_rollback_unbridge;
 
+	return 0;
+
+out_rollback_unbridge:
+	dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
+out_rollback:
+	dp->bridge_dev = NULL;
 	return err;
 }
 
@@ -194,23 +298,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
 	if (err)
 		pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
 
-	/* Configure the port for standalone mode (no address learning,
-	 * flood everything).
-	 * The bridge only emits SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS events
-	 * when the user requests it through netlink or sysfs, but not
-	 * automatically at port join or leave, so we need to handle resetting
-	 * the brport flags ourselves. But we even prefer it that way, because
-	 * otherwise, some setups might never get the notification they need,
-	 * for example, when a port leaves a LAG that offloads the bridge,
-	 * it becomes standalone, but as far as the bridge is concerned, no
-	 * port ever left.
-	 */
-	dsa_port_change_brport_flags(dp, false);
-
-	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
-	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
-	 */
-	dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+	dsa_port_switchdev_unsync(dp);
 }
 
 int dsa_port_lag_change(struct dsa_port *dp,
@@ -241,7 +329,8 @@ int dsa_port_lag_change(struct dsa_port *dp,
 }
 
 int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
-		      struct netdev_lag_upper_info *uinfo)
+		      struct netdev_lag_upper_info *uinfo,
+		      struct netlink_ext_ack *extack)
 {
 	struct dsa_notifier_lag_info info = {
 		.sw_index = dp->ds->index,
@@ -249,17 +338,31 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
 		.lag = lag,
 		.info = uinfo,
 	};
+	struct net_device *bridge_dev;
 	int err;
 
 	dsa_lag_map(dp->ds->dst, lag);
 	dp->lag_dev = lag;
 
 	err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info);
-	if (err) {
-		dp->lag_dev = NULL;
-		dsa_lag_unmap(dp->ds->dst, lag);
-	}
+	if (err)
+		goto err_lag_join;
 
+	bridge_dev = netdev_master_upper_dev_get(lag);
+	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+		return 0;
+
+	err = dsa_port_bridge_join(dp, bridge_dev, extack);
+	if (err)
+		goto err_bridge_join;
+
+	return 0;
+
+err_bridge_join:
+	dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
+err_lag_join:
+	dp->lag_dev = NULL;
+	dsa_lag_unmap(dp->ds->dst, lag);
 	return err;
 }
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 992fcab4b552..995e0e16f295 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1278,14 +1278,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 	}
 }
 
+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
+				    void *type_data)
+{
+	struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
+	struct net_device *master = cpu_dp->master;
+
+	if (!master->netdev_ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
+}
+
 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_switch *ds = dp->ds;
 
-	if (type == TC_SETUP_BLOCK)
+	switch (type) {
+	case TC_SETUP_BLOCK:
 		return dsa_slave_setup_tc_block(dev, type_data);
+	case TC_SETUP_FT:
+		return dsa_slave_setup_ft_block(ds, dp->index, type_data);
+	default:
+		break;
+	}
 
 	if (!ds->ops->port_setup_tc)
 		return -EOPNOTSUPP;
@@ -1654,6 +1672,21 @@ static void dsa_slave_get_stats64(struct net_device *dev,
 		dev_get_tstats64(dev, s);
 }
 
+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
+				       struct net_device_path *path)
+{
+	struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+
+	path->dev = ctx->dev;
+	path->type = DEV_PATH_DSA;
+	path->dsa.proto = cpu_dp->tag_ops->proto;
+	path->dsa.port = dp->index;
+	ctx->dev = cpu_dp->master;
+
+	return 0;
+}
+
 static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_open	 	= dsa_slave_open,
 	.ndo_stop		= dsa_slave_close,
@@ -1679,6 +1712,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= dsa_slave_vlan_rx_kill_vid,
 	.ndo_get_devlink_port	= dsa_slave_get_devlink_port,
 	.ndo_change_mtu		= dsa_slave_change_mtu,
+	.ndo_fill_forward_path	= dsa_slave_fill_forward_path,
 };
 
 static struct device_type dsa_type = {
@@ -1976,11 +2010,14 @@ static int dsa_slave_changeupper(struct net_device *dev,
 				 struct netdev_notifier_changeupper_info *info)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct netlink_ext_ack *extack;
 	int err = NOTIFY_DONE;
 
+	extack = netdev_notifier_info_to_extack(&info->info);
+
 	if (netif_is_bridge_master(info->upper_dev)) {
 		if (info->linking) {
-			err = dsa_port_bridge_join(dp, info->upper_dev);
+			err = dsa_port_bridge_join(dp, info->upper_dev, extack);
 			if (!err)
 				dsa_bridge_mtu_normalization(dp);
 			err = notifier_from_errno(err);
@@ -1991,7 +2028,7 @@ static int dsa_slave_changeupper(struct net_device *dev,
 	} else if (netif_is_lag_master(info->upper_dev)) {
 		if (info->linking) {
 			err = dsa_port_lag_join(dp, info->upper_dev,
-						info->upper_info);
+						info->upper_info, extack);
 			if (err == -EOPNOTSUPP) {
 				NL_SET_ERR_MSG_MOD(info->info.extack,
 						   "Offloading not supported");
@@ -2389,11 +2426,11 @@ static struct notifier_block dsa_slave_nb __read_mostly = {
 	.notifier_call  = dsa_slave_netdevice_event,
 };
 
-static struct notifier_block dsa_slave_switchdev_notifier = {
+struct notifier_block dsa_slave_switchdev_notifier = {
 	.notifier_call = dsa_slave_switchdev_event,
 };
 
-static struct notifier_block dsa_slave_switchdev_blocking_notifier = {
+struct notifier_block dsa_slave_switchdev_blocking_notifier = {
 	.notifier_call = dsa_slave_switchdev_blocking_event,
 };
 
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e2577a7dcbca..40e9f3098c8d 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,9 +12,26 @@
 
 #include "dsa_priv.h"
 
-/* This tag length is 4 bytes, older ones were 6 bytes, we do not
- * handle them
- */
+/* Legacy Broadcom tag (6 bytes) */
+#define BRCM_LEG_TAG_LEN	6
+
+/* Type fields */
+/* 1st byte in the tag */
+#define BRCM_LEG_TYPE_HI	0x88
+/* 2nd byte in the tag */
+#define BRCM_LEG_TYPE_LO	0x74
+
+/* Tag fields */
+/* 3rd byte in the tag */
+#define BRCM_LEG_UNICAST	(0 << 5)
+#define BRCM_LEG_MULTICAST	(1 << 5)
+#define BRCM_LEG_EGRESS		(2 << 5)
+#define BRCM_LEG_INGRESS	(3 << 5)
+
+/* 6th byte in the tag */
+#define BRCM_LEG_PORT_ID	(0xf)
+
+/* Newer Broadcom tag (4 bytes) */
 #define BRCM_TAG_LEN	4
 
 /* Tag is constructed and desconstructed using byte by byte access
@@ -195,6 +212,87 @@ DSA_TAG_DRIVER(brcm_netdev_ops);
 MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM);
 #endif
 
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY)
+static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
+					 struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	u8 *brcm_tag;
+
+	/* The Ethernet switch we are interfaced with needs packets to be at
+	 * least 64 bytes (including FCS) otherwise they will be discarded when
+	 * they enter the switch port logic. When Broadcom tags are enabled, we
+	 * need to make sure that packets are at least 70 bytes
+	 * (including FCS and tag) because the length verification is done after
+	 * the Broadcom tag is stripped off the ingress packet.
+	 *
+	 * Let dsa_slave_xmit() free the SKB
+	 */
+	if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false))
+		return NULL;
+
+	skb_push(skb, BRCM_LEG_TAG_LEN);
+
+	memmove(skb->data, skb->data + BRCM_LEG_TAG_LEN, 2 * ETH_ALEN);
+
+	brcm_tag = skb->data + 2 * ETH_ALEN;
+
+	/* Broadcom tag type */
+	brcm_tag[0] = BRCM_LEG_TYPE_HI;
+	brcm_tag[1] = BRCM_LEG_TYPE_LO;
+
+	/* Broadcom tag value */
+	brcm_tag[2] = BRCM_LEG_EGRESS;
+	brcm_tag[3] = 0;
+	brcm_tag[4] = 0;
+	brcm_tag[5] = dp->index & BRCM_LEG_PORT_ID;
+
+	return skb;
+}
+
+static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
+					struct net_device *dev,
+					struct packet_type *pt)
+{
+	int source_port;
+	u8 *brcm_tag;
+
+	if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
+		return NULL;
+
+	brcm_tag = skb->data - 2;
+
+	source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
+
+	skb->dev = dsa_master_find_slave(dev, 0, source_port);
+	if (!skb->dev)
+		return NULL;
+
+	/* Remove Broadcom tag and update checksum */
+	skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN);
+
+	skb->offload_fwd_mark = 1;
+
+	/* Move the Ethernet DA and SA */
+	memmove(skb->data - ETH_HLEN,
+		skb->data - ETH_HLEN - BRCM_LEG_TAG_LEN,
+		2 * ETH_ALEN);
+
+	return skb;
+}
+
+static const struct dsa_device_ops brcm_legacy_netdev_ops = {
+	.name = "brcm-legacy",
+	.proto = DSA_TAG_PROTO_BRCM_LEGACY,
+	.xmit = brcm_leg_tag_xmit,
+	.rcv = brcm_leg_tag_rcv,
+	.overhead = BRCM_LEG_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(brcm_legacy_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY);
+#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */
+
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
 static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
 					     struct net_device *dev)
@@ -227,6 +325,9 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] =	{
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM)
 	&DSA_TAG_DRIVER_NAME(brcm_netdev_ops),
 #endif
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY)
+	&DSA_TAG_DRIVER_NAME(brcm_legacy_netdev_ops),
+#endif
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
 	&DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops),
 #endif
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 59748487664f..f9b2966d1936 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -24,9 +24,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u8 xmit_tpid;
 	u8 *mtk_tag;
-	unsigned char *dest = eth_hdr(skb)->h_dest;
-	bool is_multicast_skb = is_multicast_ether_addr(dest) &&
-				!is_broadcast_ether_addr(dest);
 
 	/* Build the special tag after the MAC Source Address. If VLAN header
 	 * is present, it's required that VLAN header and special tag is
@@ -55,10 +52,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	mtk_tag[0] = xmit_tpid;
 	mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
 
-	/* Disable SA learning for multicast frames */
-	if (unlikely(is_multicast_skb))
-		mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
-
 	/* Tag control information is kept for 802.1Q */
 	if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
 		mtk_tag[2] = 0;
@@ -74,9 +67,6 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	u16 hdr;
 	int port;
 	__be16 *phdr;
-	unsigned char *dest = eth_hdr(skb)->h_dest;
-	bool is_multicast_skb = is_multicast_ether_addr(dest) &&
-				!is_broadcast_ether_addr(dest);
 
 	if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
 		return NULL;
@@ -102,9 +92,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb->dev)
 		return NULL;
 
-	/* Only unicast or broadcast frames are offloaded */
-	if (likely(!is_multicast_skb))
-		skb->offload_fwd_mark = 1;
+	skb->offload_fwd_mark = 1;
 
 	return skb;
 }
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 743809b5806b..f9df9cac81c5 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -83,7 +83,6 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	struct dsa_port *dp;
 	u8 *extraction;
 	u16 vlan_tpid;
-	u64 cpuq;
 
 	/* Revert skb->data by the amount consumed by the DSA master,
 	 * so it points to the beginning of the frame.
@@ -113,7 +112,6 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	ocelot_xfh_get_qos_class(extraction, &qos_class);
 	ocelot_xfh_get_tag_type(extraction, &tag_type);
 	ocelot_xfh_get_vlan_tci(extraction, &vlan_tci);
-	ocelot_xfh_get_cpuq(extraction, &cpuq);
 
 	skb->dev = dsa_master_find_slave(netdev, 0, src_port);
 	if (!skb->dev)
@@ -128,12 +126,6 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	skb->offload_fwd_mark = 1;
 	skb->priority = qos_class;
 
-#if IS_ENABLED(CONFIG_BRIDGE_MRP)
-	if (eth_hdr(skb)->h_proto == cpu_to_be16(ETH_P_MRP) &&
-	    cpuq & BIT(OCELOT_MRP_CPUQ))
-		skb->offload_fwd_mark = 0;
-#endif
-
 	/* Ocelot switches copy frames unmodified to the CPU. However, it is
 	 * possible for the user to request a VLAN modification through
 	 * VCAP_IS1_ACT_VID_REPLACE_ENA. In this case, what will happen is that
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 4106373180c6..933b427122be 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,6 +58,7 @@
 #include <net/ip.h>
 #include <net/dsa.h>
 #include <net/flow_dissector.h>
+#include <net/gro.h>
 #include <linux/uaccess.h>
 #include <net/pkt_sched.h>
 
@@ -122,7 +123,7 @@ EXPORT_SYMBOL(eth_header);
  * Make a best effort attempt to pull the length for all of the headers for
  * a given frame in a linear buffer.
  */
-u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len)
+u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len)
 {
 	const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
 	const struct ethhdr *eth = (const struct ethhdr *)data;
@@ -449,7 +450,10 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 	skb_gro_pull(skb, sizeof(*eh));
 	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
-	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+
+	pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive,
+					    ipv6_gro_receive, inet_gro_receive,
+					    head, skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -473,8 +477,9 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff)
 	rcu_read_lock();
 	ptype = gro_find_complete_by_type(type);
 	if (ptype != NULL)
-		err = ptype->callbacks.gro_complete(skb, nhoff +
-						    sizeof(struct ethhdr));
+		err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+					 ipv6_gro_complete, inet_gro_complete,
+					 skb, nhoff + sizeof(*eh));
 
 	rcu_read_unlock();
 	return err;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 24783b71c584..0788cc3b3114 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1844,6 +1844,18 @@ out:
 	return ret;
 }
 
+__printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(*data, ETH_GSTRING_LEN, fmt, args);
+	va_end(args);
+
+	*data += ETH_GSTRING_LEN;
+}
+EXPORT_SYMBOL(ethtool_sprintf);
+
 static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_value id;
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 4cfd9e829c7b..99f3af1a9d4d 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -108,7 +108,7 @@ void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
 /* hsr_debugfs_term - Tear down debugfs intrastructure
  *
  * Description:
- * When Debufs is configured this routine removes debugfs file system
+ * When Debugfs is configured this routine removes debugfs file system
  * elements that are specific to hsr
  */
 void
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 5b77a46885b9..bbdd9c44f14e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -62,7 +62,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
 obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
-obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o
+obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 743777bce179..f09fe3a5608f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -16,6 +16,9 @@
 #include <net/route.h>
 #include <net/sock.h>
 
+#define NH_RES_DEFAULT_IDLE_TIMER	(120 * HZ)
+#define NH_RES_DEFAULT_UNBALANCED_TIMER	0	/* No forced rebalancing. */
+
 static void remove_nexthop(struct net *net, struct nexthop *nh,
 			   struct nl_info *nlinfo);
 
@@ -32,6 +35,7 @@ static const struct nla_policy rtm_nh_policy_new[] = {
 	[NHA_ENCAP_TYPE]	= { .type = NLA_U16 },
 	[NHA_ENCAP]		= { .type = NLA_NESTED },
 	[NHA_FDB]		= { .type = NLA_FLAG },
+	[NHA_RES_GROUP]		= { .type = NLA_NESTED },
 };
 
 static const struct nla_policy rtm_nh_policy_get[] = {
@@ -45,6 +49,32 @@ static const struct nla_policy rtm_nh_policy_dump[] = {
 	[NHA_FDB]		= { .type = NLA_FLAG },
 };
 
+static const struct nla_policy rtm_nh_res_policy_new[] = {
+	[NHA_RES_GROUP_BUCKETS]			= { .type = NLA_U16 },
+	[NHA_RES_GROUP_IDLE_TIMER]		= { .type = NLA_U32 },
+	[NHA_RES_GROUP_UNBALANCED_TIMER]	= { .type = NLA_U32 },
+};
+
+static const struct nla_policy rtm_nh_policy_dump_bucket[] = {
+	[NHA_ID]		= { .type = NLA_U32 },
+	[NHA_OIF]		= { .type = NLA_U32 },
+	[NHA_MASTER]		= { .type = NLA_U32 },
+	[NHA_RES_BUCKET]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = {
+	[NHA_RES_BUCKET_NH_ID]	= { .type = NLA_U32 },
+};
+
+static const struct nla_policy rtm_nh_policy_get_bucket[] = {
+	[NHA_ID]		= { .type = NLA_U32 },
+	[NHA_RES_BUCKET]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy rtm_nh_res_bucket_policy_get[] = {
+	[NHA_RES_BUCKET_INDEX]	= { .type = NLA_U16 },
+};
+
 static bool nexthop_notifiers_is_empty(struct net *net)
 {
 	return !net->nexthop.notifier_chain.head;
@@ -52,10 +82,8 @@ static bool nexthop_notifiers_is_empty(struct net *net)
 
 static void
 __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
-			       const struct nexthop *nh)
+			       const struct nh_info *nhi)
 {
-	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
-
 	nh_info->dev = nhi->fib_nhc.nhc_dev;
 	nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
 	if (nh_info->gw_family == AF_INET)
@@ -71,12 +99,14 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
 static int nh_notifier_single_info_init(struct nh_notifier_info *info,
 					const struct nexthop *nh)
 {
+	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+
 	info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
 	info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
 	if (!info->nh)
 		return -ENOMEM;
 
-	__nh_notifier_single_info_init(info->nh, nh);
+	__nh_notifier_single_info_init(info->nh, nhi);
 
 	return 0;
 }
@@ -103,11 +133,44 @@ static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
 
 	for (i = 0; i < num_nh; i++) {
 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		struct nh_info *nhi;
 
+		nhi = rtnl_dereference(nhge->nh->nh_info);
 		info->nh_grp->nh_entries[i].id = nhge->nh->id;
 		info->nh_grp->nh_entries[i].weight = nhge->weight;
 		__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
-					       nhge->nh);
+					       nhi);
+	}
+
+	return 0;
+}
+
+static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
+					   struct nh_group *nhg)
+{
+	struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
+	u16 num_nh_buckets = res_table->num_nh_buckets;
+	unsigned long size;
+	u16 i;
+
+	info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE;
+	size = struct_size(info->nh_res_table, nhs, num_nh_buckets);
+	info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO |
+				       __GFP_NOWARN);
+	if (!info->nh_res_table)
+		return -ENOMEM;
+
+	info->nh_res_table->num_nh_buckets = num_nh_buckets;
+
+	for (i = 0; i < num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		struct nh_grp_entry *nhge;
+		struct nh_info *nhi;
+
+		nhge = rtnl_dereference(bucket->nh_entry);
+		nhi = rtnl_dereference(nhge->nh->nh_info);
+		__nh_notifier_single_info_init(&info->nh_res_table->nhs[i],
+					       nhi);
 	}
 
 	return 0;
@@ -120,6 +183,8 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
 
 	if (nhg->mpath)
 		return nh_notifier_mp_info_init(info, nhg);
+	else if (nhg->resilient)
+		return nh_notifier_res_table_info_init(info, nhg);
 	return -EINVAL;
 }
 
@@ -130,6 +195,8 @@ static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
 
 	if (nhg->mpath)
 		kfree(info->nh_grp);
+	else if (nhg->resilient)
+		vfree(info->nh_res_table);
 }
 
 static int nh_notifier_info_init(struct nh_notifier_info *info,
@@ -181,6 +248,178 @@ static int call_nexthop_notifiers(struct net *net,
 	return notifier_to_errno(err);
 }
 
+static int
+nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info,
+				      bool force, unsigned int *p_idle_timer_ms)
+{
+	struct nh_res_table *res_table;
+	struct nh_group *nhg;
+	struct nexthop *nh;
+	int err = 0;
+
+	/* When 'force' is false, nexthop bucket replacement is performed
+	 * because the bucket was deemed to be idle. In this case, capable
+	 * listeners can choose to perform an atomic replacement: The bucket is
+	 * only replaced if it is inactive. However, if the idle timer interval
+	 * is smaller than the interval in which a listener is querying
+	 * buckets' activity from the device, then atomic replacement should
+	 * not be tried. Pass the idle timer value to listeners, so that they
+	 * could determine which type of replacement to perform.
+	 */
+	if (force) {
+		*p_idle_timer_ms = 0;
+		return 0;
+	}
+
+	rcu_read_lock();
+
+	nh = nexthop_find_by_id(info->net, info->id);
+	if (!nh) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	nhg = rcu_dereference(nh->nh_grp);
+	res_table = rcu_dereference(nhg->res_table);
+	*p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer);
+
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
+static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info,
+					    u16 bucket_index, bool force,
+					    struct nh_info *oldi,
+					    struct nh_info *newi)
+{
+	unsigned int idle_timer_ms;
+	int err;
+
+	err = nh_notifier_res_bucket_idle_timer_get(info, force,
+						    &idle_timer_ms);
+	if (err)
+		return err;
+
+	info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET;
+	info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket),
+				      GFP_KERNEL);
+	if (!info->nh_res_bucket)
+		return -ENOMEM;
+
+	info->nh_res_bucket->bucket_index = bucket_index;
+	info->nh_res_bucket->idle_timer_ms = idle_timer_ms;
+	info->nh_res_bucket->force = force;
+	__nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi);
+	__nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi);
+	return 0;
+}
+
+static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info)
+{
+	kfree(info->nh_res_bucket);
+}
+
+static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
+					       u16 bucket_index, bool force,
+					       struct nh_info *oldi,
+					       struct nh_info *newi,
+					       struct netlink_ext_ack *extack)
+{
+	struct nh_notifier_info info = {
+		.net = net,
+		.extack = extack,
+		.id = nhg_id,
+	};
+	int err;
+
+	if (nexthop_notifiers_is_empty(net))
+		return 0;
+
+	err = nh_notifier_res_bucket_info_init(&info, bucket_index, force,
+					       oldi, newi);
+	if (err)
+		return err;
+
+	err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+					   NEXTHOP_EVENT_BUCKET_REPLACE, &info);
+	nh_notifier_res_bucket_info_fini(&info);
+
+	return notifier_to_errno(err);
+}
+
+/* There are three users of RES_TABLE, and NHs etc. referenced from there:
+ *
+ * 1) a collection of callbacks for NH maintenance. This operates under
+ *    RTNL,
+ * 2) the delayed work that gradually balances the resilient table,
+ * 3) and nexthop_select_path(), operating under RCU.
+ *
+ * Both the delayed work and the RTNL block are writers, and need to
+ * maintain mutual exclusion. Since there are only two and well-known
+ * writers for each table, the RTNL code can make sure it has exclusive
+ * access thus:
+ *
+ * - Have the DW operate without locking;
+ * - synchronously cancel the DW;
+ * - do the writing;
+ * - if the write was not actually a delete, call upkeep, which schedules
+ *   DW again if necessary.
+ *
+ * The functions that are always called from the RTNL context use
+ * rtnl_dereference(). The functions that can also be called from the DW do
+ * a raw dereference and rely on the above mutual exclusion scheme.
+ */
+#define nh_res_dereference(p) (rcu_dereference_raw(p))
+
+static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
+					     u16 bucket_index, bool force,
+					     struct nexthop *old_nh,
+					     struct nexthop *new_nh,
+					     struct netlink_ext_ack *extack)
+{
+	struct nh_info *oldi = nh_res_dereference(old_nh->nh_info);
+	struct nh_info *newi = nh_res_dereference(new_nh->nh_info);
+
+	return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index,
+						   force, oldi, newi, extack);
+}
+
+static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
+					    struct netlink_ext_ack *extack)
+{
+	struct nh_notifier_info info = {
+		.net = net,
+		.extack = extack,
+	};
+	struct nh_group *nhg;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (nexthop_notifiers_is_empty(net))
+		return 0;
+
+	/* At this point, the nexthop buckets are still not populated. Only
+	 * emit a notification with the logical nexthops, so that a listener
+	 * could potentially veto it in case of unsupported configuration.
+	 */
+	nhg = rtnl_dereference(nh->nh_grp);
+	err = nh_notifier_mp_info_init(&info, nhg);
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
+		return err;
+	}
+
+	err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+					   NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+					   &info);
+	kfree(info.nh_grp);
+
+	return notifier_to_errno(err);
+}
+
 static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
 				 enum nexthop_event_type event_type,
 				 struct nexthop *nh,
@@ -239,6 +478,9 @@ static void nexthop_free_group(struct nexthop *nh)
 
 	WARN_ON(nhg->spare == nhg);
 
+	if (nhg->resilient)
+		vfree(rcu_dereference_raw(nhg->res_table));
+
 	kfree(nhg->spare);
 	kfree(nhg);
 }
@@ -297,6 +539,30 @@ static struct nh_group *nexthop_grp_alloc(u16 num_nh)
 	return nhg;
 }
 
+static void nh_res_table_upkeep_dw(struct work_struct *work);
+
+static struct nh_res_table *
+nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg)
+{
+	const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets;
+	struct nh_res_table *res_table;
+	unsigned long size;
+
+	size = struct_size(res_table, nh_buckets, num_nh_buckets);
+	res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
+	if (!res_table)
+		return NULL;
+
+	res_table->net = net;
+	res_table->nhg_id = nhg_id;
+	INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw);
+	INIT_LIST_HEAD(&res_table->uw_nh_entries);
+	res_table->idle_timer = cfg->nh_grp_res_idle_timer;
+	res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer;
+	res_table->num_nh_buckets = num_nh_buckets;
+	return res_table;
+}
+
 static void nh_base_seq_inc(struct net *net)
 {
 	while (++net->nexthop.seq == 0)
@@ -345,6 +611,48 @@ static u32 nh_find_unused_id(struct net *net)
 	return 0;
 }
 
+static void nh_res_time_set_deadline(unsigned long next_time,
+				     unsigned long *deadline)
+{
+	if (time_before(next_time, *deadline))
+		*deadline = next_time;
+}
+
+static clock_t nh_res_table_unbalanced_time(struct nh_res_table *res_table)
+{
+	if (list_empty(&res_table->uw_nh_entries))
+		return 0;
+	return jiffies_delta_to_clock_t(jiffies - res_table->unbalanced_since);
+}
+
+static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg)
+{
+	struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NHA_RES_GROUP);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u16(skb, NHA_RES_GROUP_BUCKETS,
+			res_table->num_nh_buckets) ||
+	    nla_put_u32(skb, NHA_RES_GROUP_IDLE_TIMER,
+			jiffies_to_clock_t(res_table->idle_timer)) ||
+	    nla_put_u32(skb, NHA_RES_GROUP_UNBALANCED_TIMER,
+			jiffies_to_clock_t(res_table->unbalanced_timer)) ||
+	    nla_put_u64_64bit(skb, NHA_RES_GROUP_UNBALANCED_TIME,
+			      nh_res_table_unbalanced_time(res_table),
+			      NHA_RES_GROUP_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 {
 	struct nexthop_grp *p;
@@ -355,6 +663,8 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 
 	if (nhg->mpath)
 		group_type = NEXTHOP_GRP_TYPE_MPATH;
+	else if (nhg->resilient)
+		group_type = NEXTHOP_GRP_TYPE_RES;
 
 	if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
 		goto nla_put_failure;
@@ -370,6 +680,9 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 		p += 1;
 	}
 
+	if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -457,13 +770,26 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg)
+{
+	return nla_total_size(0) +	/* NHA_RES_GROUP */
+		nla_total_size(2) +	/* NHA_RES_GROUP_BUCKETS */
+		nla_total_size(4) +	/* NHA_RES_GROUP_IDLE_TIMER */
+		nla_total_size(4) +	/* NHA_RES_GROUP_UNBALANCED_TIMER */
+		nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */
+}
+
 static size_t nh_nlmsg_size_grp(struct nexthop *nh)
 {
 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 	size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
+	size_t tot = nla_total_size(sz) +
+		nla_total_size(2); /* NHA_GROUP_TYPE */
+
+	if (nhg->resilient)
+		tot += nh_nlmsg_size_grp_res(nhg);
 
-	return nla_total_size(sz) +
-	       nla_total_size(2);  /* NHA_GROUP_TYPE */
+	return tot;
 }
 
 static size_t nh_nlmsg_size_single(struct nexthop *nh)
@@ -538,20 +864,144 @@ errout:
 		rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
 }
 
+static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
+{
+	return (unsigned long)atomic_long_read(&bucket->used_time);
+}
+
+static unsigned long
+nh_res_bucket_idle_point(const struct nh_res_table *res_table,
+			 const struct nh_res_bucket *bucket,
+			 unsigned long now)
+{
+	unsigned long time = nh_res_bucket_used_time(bucket);
+
+	/* Bucket was not used since it was migrated. The idle time is now. */
+	if (time == bucket->migrated_time)
+		return now;
+
+	return time + res_table->idle_timer;
+}
+
+static unsigned long
+nh_res_table_unb_point(const struct nh_res_table *res_table)
+{
+	return res_table->unbalanced_since + res_table->unbalanced_timer;
+}
+
+static void nh_res_bucket_set_idle(const struct nh_res_table *res_table,
+				   struct nh_res_bucket *bucket)
+{
+	unsigned long now = jiffies;
+
+	atomic_long_set(&bucket->used_time, (long)now);
+	bucket->migrated_time = now;
+}
+
+static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
+{
+	atomic_long_set(&bucket->used_time, (long)jiffies);
+}
+
+static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket)
+{
+	unsigned long used_time = nh_res_bucket_used_time(bucket);
+
+	return jiffies_delta_to_clock_t(jiffies - used_time);
+}
+
+static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh,
+			      struct nh_res_bucket *bucket, u16 bucket_index,
+			      int event, u32 portid, u32 seq,
+			      unsigned int nlflags,
+			      struct netlink_ext_ack *extack)
+{
+	struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	struct nhmsg *nhm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	nhm = nlmsg_data(nlh);
+	nhm->nh_family = AF_UNSPEC;
+	nhm->nh_flags = bucket->nh_flags;
+	nhm->nh_protocol = nh->protocol;
+	nhm->nh_scope = 0;
+	nhm->resvd = 0;
+
+	if (nla_put_u32(skb, NHA_ID, nh->id))
+		goto nla_put_failure;
+
+	nest = nla_nest_start(skb, NHA_RES_BUCKET);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) ||
+	    nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) ||
+	    nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME,
+			      nh_res_bucket_idle_time(bucket),
+			      NHA_RES_BUCKET_PAD))
+		goto nla_put_failure_nest;
+
+	nla_nest_end(skb, nest);
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure_nest:
+	nla_nest_cancel(skb, nest);
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static void nexthop_bucket_notify(struct nh_res_table *res_table,
+				  u16 bucket_index)
+{
+	struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
+	struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
+	struct nexthop *nh = nhge->nh_parent;
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		goto errout;
+
+	err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+				 RTM_NEWNEXTHOPBUCKET, 0, 0, NLM_F_REPLACE,
+				 NULL);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
+}
+
 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 			   bool *is_fdb, struct netlink_ext_ack *extack)
 {
 	if (nh->is_group) {
 		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 
-		/* nested multipath (group within a group) is not
-		 * supported
-		 */
+		/* Nesting groups within groups is not supported. */
 		if (nhg->mpath) {
 			NL_SET_ERR_MSG(extack,
 				       "Multipath group can not be a nexthop within a group");
 			return false;
 		}
+		if (nhg->resilient) {
+			NL_SET_ERR_MSG(extack,
+				       "Resilient group can not be a nexthop within a group");
+			return false;
+		}
 		*is_fdb = nhg->fdb_nh;
 	} else {
 		struct nh_info *nhi = rtnl_dereference(nh->nh_info);
@@ -591,7 +1041,7 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
 
 static int nh_check_attr_group(struct net *net,
 			       struct nlattr *tb[], size_t tb_size,
-			       struct netlink_ext_ack *extack)
+			       u16 nh_grp_type, struct netlink_ext_ack *extack)
 {
 	unsigned int len = nla_len(tb[NHA_GROUP]);
 	u8 nh_family = AF_UNSPEC;
@@ -652,8 +1102,14 @@ static int nh_check_attr_group(struct net *net,
 	for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
 		if (!tb[i])
 			continue;
-		if (i == NHA_FDB)
+		switch (i) {
+		case NHA_FDB:
 			continue;
+		case NHA_RES_GROUP:
+			if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+				continue;
+			break;
+		}
 		NL_SET_ERR_MSG(extack,
 			       "No other attributes can be set in nexthop groups");
 		return -EINVAL;
@@ -732,6 +1188,22 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
 	return rc;
 }
 
+static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
+{
+	struct nh_res_table *res_table = rcu_dereference(nhg->res_table);
+	u16 bucket_index = hash % res_table->num_nh_buckets;
+	struct nh_res_bucket *bucket;
+	struct nh_grp_entry *nhge;
+
+	/* nexthop_select_path() is expected to return a non-NULL value, so
+	 * skip protocol validation and just hand out whatever there is.
+	 */
+	bucket = &res_table->nh_buckets[bucket_index];
+	nh_res_bucket_set_busy(bucket);
+	nhge = rcu_dereference(bucket->nh_entry);
+	return nhge->nh;
+}
+
 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
 {
 	struct nh_group *nhg;
@@ -742,6 +1214,8 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
 	nhg = rcu_dereference(nh->nh_grp);
 	if (nhg->mpath)
 		return nexthop_select_path_mp(nhg, hash);
+	else if (nhg->resilient)
+		return nexthop_select_path_res(nhg, hash);
 
 	/* Unreachable. */
 	return NULL;
@@ -924,7 +1398,319 @@ static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
 	return 0;
 }
 
-static void nh_group_rebalance(struct nh_group *nhg)
+static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge)
+{
+	return nhge->res.count_buckets == nhge->res.wants_buckets;
+}
+
+static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge)
+{
+	return nhge->res.count_buckets > nhge->res.wants_buckets;
+}
+
+static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge)
+{
+	return nhge->res.count_buckets < nhge->res.wants_buckets;
+}
+
+static bool nh_res_table_is_balanced(const struct nh_res_table *res_table)
+{
+	return list_empty(&res_table->uw_nh_entries);
+}
+
+static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket)
+{
+	struct nh_grp_entry *nhge;
+
+	if (bucket->occupied) {
+		nhge = nh_res_dereference(bucket->nh_entry);
+		nhge->res.count_buckets--;
+		bucket->occupied = false;
+	}
+}
+
+static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket,
+				 struct nh_grp_entry *nhge)
+{
+	nh_res_bucket_unset_nh(bucket);
+
+	bucket->occupied = true;
+	rcu_assign_pointer(bucket->nh_entry, nhge);
+	nhge->res.count_buckets++;
+}
+
+static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
+					 struct nh_res_bucket *bucket,
+					 unsigned long *deadline, bool *force)
+{
+	unsigned long now = jiffies;
+	struct nh_grp_entry *nhge;
+	unsigned long idle_point;
+
+	if (!bucket->occupied) {
+		/* The bucket is not occupied, its NHGE pointer is either
+		 * NULL or obsolete. We _have to_ migrate: set force.
+		 */
+		*force = true;
+		return true;
+	}
+
+	nhge = nh_res_dereference(bucket->nh_entry);
+
+	/* If the bucket is populated by an underweight or balanced
+	 * nexthop, do not migrate.
+	 */
+	if (!nh_res_nhge_is_ow(nhge))
+		return false;
+
+	/* At this point we know that the bucket is populated with an
+	 * overweight nexthop. It needs to be migrated to a new nexthop if
+	 * the idle timer of unbalanced timer expired.
+	 */
+
+	idle_point = nh_res_bucket_idle_point(res_table, bucket, now);
+	if (time_after_eq(now, idle_point)) {
+		/* The bucket is idle. We _can_ migrate: unset force. */
+		*force = false;
+		return true;
+	}
+
+	/* Unbalanced timer of 0 means "never force". */
+	if (res_table->unbalanced_timer) {
+		unsigned long unb_point;
+
+		unb_point = nh_res_table_unb_point(res_table);
+		if (time_after(now, unb_point)) {
+			/* The bucket is not idle, but the unbalanced timer
+			 * expired. We _can_ migrate, but set force anyway,
+			 * so that drivers know to ignore activity reports
+			 * from the HW.
+			 */
+			*force = true;
+			return true;
+		}
+
+		nh_res_time_set_deadline(unb_point, deadline);
+	}
+
+	nh_res_time_set_deadline(idle_point, deadline);
+	return false;
+}
+
+static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
+				  u16 bucket_index, bool notify,
+				  bool notify_nl, bool force)
+{
+	struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
+	struct nh_grp_entry *new_nhge;
+	struct netlink_ext_ack extack;
+	int err;
+
+	new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries,
+					    struct nh_grp_entry,
+					    res.uw_nh_entry);
+	if (WARN_ON_ONCE(!new_nhge))
+		/* If this function is called, "bucket" is either not
+		 * occupied, or it belongs to a next hop that is
+		 * overweight. In either case, there ought to be a
+		 * corresponding underweight next hop.
+		 */
+		return false;
+
+	if (notify) {
+		struct nh_grp_entry *old_nhge;
+
+		old_nhge = nh_res_dereference(bucket->nh_entry);
+		err = call_nexthop_res_bucket_notifiers(res_table->net,
+							res_table->nhg_id,
+							bucket_index, force,
+							old_nhge->nh,
+							new_nhge->nh, &extack);
+		if (err) {
+			pr_err_ratelimited("%s\n", extack._msg);
+			if (!force)
+				return false;
+			/* It is not possible to veto a forced replacement, so
+			 * just clear the hardware flags from the nexthop
+			 * bucket to indicate to user space that this bucket is
+			 * not correctly populated in hardware.
+			 */
+			bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
+		}
+	}
+
+	nh_res_bucket_set_nh(bucket, new_nhge);
+	nh_res_bucket_set_idle(res_table, bucket);
+
+	if (notify_nl)
+		nexthop_bucket_notify(res_table, bucket_index);
+
+	if (nh_res_nhge_is_balanced(new_nhge))
+		list_del(&new_nhge->res.uw_nh_entry);
+	return true;
+}
+
+#define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)
+
+static void nh_res_table_upkeep(struct nh_res_table *res_table,
+				bool notify, bool notify_nl)
+{
+	unsigned long now = jiffies;
+	unsigned long deadline;
+	u16 i;
+
+	/* Deadline is the next time that upkeep should be run. It is the
+	 * earliest time at which one of the buckets might be migrated.
+	 * Start at the most pessimistic estimate: either unbalanced_timer
+	 * from now, or if there is none, idle_timer from now. For each
+	 * encountered time point, call nh_res_time_set_deadline() to
+	 * refine the estimate.
+	 */
+	if (res_table->unbalanced_timer)
+		deadline = now + res_table->unbalanced_timer;
+	else
+		deadline = now + res_table->idle_timer;
+
+	for (i = 0; i < res_table->num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		bool force;
+
+		if (nh_res_bucket_should_migrate(res_table, bucket,
+						 &deadline, &force)) {
+			if (!nh_res_bucket_migrate(res_table, i, notify,
+						   notify_nl, force)) {
+				unsigned long idle_point;
+
+				/* A driver can override the migration
+				 * decision if the HW reports that the
+				 * bucket is actually not idle. Therefore
+				 * remark the bucket as busy again and
+				 * update the deadline.
+				 */
+				nh_res_bucket_set_busy(bucket);
+				idle_point = nh_res_bucket_idle_point(res_table,
+								      bucket,
+								      now);
+				nh_res_time_set_deadline(idle_point, &deadline);
+			}
+		}
+	}
+
+	/* If the group is still unbalanced, schedule the next upkeep to
+	 * either the deadline computed above, or the minimum deadline,
+	 * whichever comes later.
+	 */
+	if (!nh_res_table_is_balanced(res_table)) {
+		unsigned long now = jiffies;
+		unsigned long min_deadline;
+
+		min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL;
+		if (time_before(deadline, min_deadline))
+			deadline = min_deadline;
+
+		queue_delayed_work(system_power_efficient_wq,
+				   &res_table->upkeep_dw, deadline - now);
+	}
+}
+
+static void nh_res_table_upkeep_dw(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct nh_res_table *res_table;
+
+	res_table = container_of(dw, struct nh_res_table, upkeep_dw);
+	nh_res_table_upkeep(res_table, true, true);
+}
+
+static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
+{
+	cancel_delayed_work_sync(&res_table->upkeep_dw);
+}
+
+static void nh_res_group_rebalance(struct nh_group *nhg,
+				   struct nh_res_table *res_table)
+{
+	int prev_upper_bound = 0;
+	int total = 0;
+	int w = 0;
+	int i;
+
+	INIT_LIST_HEAD(&res_table->uw_nh_entries);
+
+	for (i = 0; i < nhg->num_nh; ++i)
+		total += nhg->nh_entries[i].weight;
+
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		int upper_bound;
+
+		w += nhge->weight;
+		upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
+						total);
+		nhge->res.wants_buckets = upper_bound - prev_upper_bound;
+		prev_upper_bound = upper_bound;
+
+		if (nh_res_nhge_is_uw(nhge)) {
+			if (list_empty(&res_table->uw_nh_entries))
+				res_table->unbalanced_since = jiffies;
+			list_add(&nhge->res.uw_nh_entry,
+				 &res_table->uw_nh_entries);
+		}
+	}
+}
+
+/* Migrate buckets in res_table so that they reference NHGE's from NHG with
+ * the right NH ID. Set those buckets that do not have a corresponding NHGE
+ * entry in NHG as not occupied.
+ */
+static void nh_res_table_migrate_buckets(struct nh_res_table *res_table,
+					 struct nh_group *nhg)
+{
+	u16 i;
+
+	for (i = 0; i < res_table->num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		u32 id = rtnl_dereference(bucket->nh_entry)->nh->id;
+		bool found = false;
+		int j;
+
+		for (j = 0; j < nhg->num_nh; j++) {
+			struct nh_grp_entry *nhge = &nhg->nh_entries[j];
+
+			if (nhge->nh->id == id) {
+				nh_res_bucket_set_nh(bucket, nhge);
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			nh_res_bucket_unset_nh(bucket);
+	}
+}
+
+static void replace_nexthop_grp_res(struct nh_group *oldg,
+				    struct nh_group *newg)
+{
+	/* For NH group replacement, the new NHG might only have a stub
+	 * hash table with 0 buckets, because the number of buckets was not
+	 * specified. For NH removal, oldg and newg both reference the same
+	 * res_table. So in any case, in the following, we want to work
+	 * with oldg->res_table.
+	 */
+	struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table);
+	unsigned long prev_unbalanced_since = old_res_table->unbalanced_since;
+	bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries);
+
+	nh_res_table_cancel_upkeep(old_res_table);
+	nh_res_table_migrate_buckets(old_res_table, newg);
+	nh_res_group_rebalance(newg, old_res_table);
+	if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
+		old_res_table->unbalanced_since = prev_unbalanced_since;
+	nh_res_table_upkeep(old_res_table, true, false);
+}
+
+static void nh_mp_group_rebalance(struct nh_group *nhg)
 {
 	int total = 0;
 	int w = 0;
@@ -965,7 +1751,9 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 	}
 
 	newg->has_v4 = false;
+	newg->is_multipath = nhg->is_multipath;
 	newg->mpath = nhg->mpath;
+	newg->resilient = nhg->resilient;
 	newg->fdb_nh = nhg->fdb_nh;
 	newg->num_nh = nhg->num_nh;
 
@@ -993,15 +1781,25 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 		j++;
 	}
 
-	nh_group_rebalance(newg);
+	if (newg->mpath)
+		nh_mp_group_rebalance(newg);
+	else if (newg->resilient)
+		replace_nexthop_grp_res(nhg, newg);
+
 	rcu_assign_pointer(nhp->nh_grp, newg);
 
 	list_del(&nhge->nh_list);
 	nexthop_put(nhge->nh);
 
-	err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
-	if (err)
-		pr_err("%s\n", extack._msg);
+	/* Removal of a NH from a resilient group is notified through
+	 * bucket notifications.
+	 */
+	if (newg->mpath) {
+		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
+					     &extack);
+		if (err)
+			pr_err("%s\n", extack._msg);
+	}
 
 	if (nlinfo)
 		nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
@@ -1022,6 +1820,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
 static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
 {
 	struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+	struct nh_res_table *res_table;
 	int i, num_nh = nhg->num_nh;
 
 	for (i = 0; i < num_nh; ++i) {
@@ -1032,6 +1831,11 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
 
 		list_del_init(&nhge->nh_list);
 	}
+
+	if (nhg->resilient) {
+		res_table = rtnl_dereference(nhg->res_table);
+		nh_res_table_cancel_upkeep(res_table);
+	}
 }
 
 /* not called for nexthop replace */
@@ -1107,9 +1911,12 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
 }
 
 static int replace_nexthop_grp(struct net *net, struct nexthop *old,
-			       struct nexthop *new,
+			       struct nexthop *new, const struct nh_config *cfg,
 			       struct netlink_ext_ack *extack)
 {
+	struct nh_res_table *tmp_table = NULL;
+	struct nh_res_table *new_res_table;
+	struct nh_res_table *old_res_table;
 	struct nh_group *oldg, *newg;
 	int i, err;
 
@@ -1118,19 +1925,67 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
 		return -EINVAL;
 	}
 
-	err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
-	if (err)
-		return err;
-
 	oldg = rtnl_dereference(old->nh_grp);
 	newg = rtnl_dereference(new->nh_grp);
 
+	if (newg->mpath != oldg->mpath) {
+		NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type.");
+		return -EINVAL;
+	}
+
+	if (newg->mpath) {
+		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new,
+					     extack);
+		if (err)
+			return err;
+	} else if (newg->resilient) {
+		new_res_table = rtnl_dereference(newg->res_table);
+		old_res_table = rtnl_dereference(oldg->res_table);
+
+		/* Accept if num_nh_buckets was not given, but if it was
+		 * given, demand that the value be correct.
+		 */
+		if (cfg->nh_grp_res_has_num_buckets &&
+		    cfg->nh_grp_res_num_buckets !=
+		    old_res_table->num_nh_buckets) {
+			NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group.");
+			return -EINVAL;
+		}
+
+		/* Emit a pre-replace notification so that listeners could veto
+		 * a potentially unsupported configuration. Otherwise,
+		 * individual bucket replacement notifications would need to be
+		 * vetoed, which is something that should only happen if the
+		 * bucket is currently active.
+		 */
+		err = call_nexthop_res_table_notifiers(net, new, extack);
+		if (err)
+			return err;
+
+		if (cfg->nh_grp_res_has_idle_timer)
+			old_res_table->idle_timer = cfg->nh_grp_res_idle_timer;
+		if (cfg->nh_grp_res_has_unbalanced_timer)
+			old_res_table->unbalanced_timer =
+				cfg->nh_grp_res_unbalanced_timer;
+
+		replace_nexthop_grp_res(oldg, newg);
+
+		tmp_table = new_res_table;
+		rcu_assign_pointer(newg->res_table, old_res_table);
+		rcu_assign_pointer(newg->spare->res_table, old_res_table);
+	}
+
 	/* update parents - used by nexthop code for cleanup */
 	for (i = 0; i < newg->num_nh; i++)
 		newg->nh_entries[i].nh_parent = old;
 
 	rcu_assign_pointer(old->nh_grp, newg);
 
+	if (newg->resilient) {
+		rcu_assign_pointer(oldg->res_table, tmp_table);
+		rcu_assign_pointer(oldg->spare->res_table, tmp_table);
+	}
+
 	for (i = 0; i < oldg->num_nh; i++)
 		oldg->nh_entries[i].nh_parent = new;
 
@@ -1156,6 +2011,71 @@ static void nh_group_v4_update(struct nh_group *nhg)
 	nhg->has_v4 = has_v4;
 }
 
+static int replace_nexthop_single_notify_res(struct net *net,
+					     struct nh_res_table *res_table,
+					     struct nexthop *old,
+					     struct nh_info *oldi,
+					     struct nh_info *newi,
+					     struct netlink_ext_ack *extack)
+{
+	u32 nhg_id = res_table->nhg_id;
+	int err;
+	u16 i;
+
+	for (i = 0; i < res_table->num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		struct nh_grp_entry *nhge;
+
+		nhge = rtnl_dereference(bucket->nh_entry);
+		if (nhge->nh == old) {
+			err = __call_nexthop_res_bucket_notifiers(net, nhg_id,
+								  i, true,
+								  oldi, newi,
+								  extack);
+			if (err)
+				goto err_notify;
+		}
+	}
+
+	return 0;
+
+err_notify:
+	while (i-- > 0) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		struct nh_grp_entry *nhge;
+
+		nhge = rtnl_dereference(bucket->nh_entry);
+		if (nhge->nh == old)
+			__call_nexthop_res_bucket_notifiers(net, nhg_id, i,
+							    true, newi, oldi,
+							    extack);
+	}
+	return err;
+}
+
+static int replace_nexthop_single_notify(struct net *net,
+					 struct nexthop *group_nh,
+					 struct nexthop *old,
+					 struct nh_info *oldi,
+					 struct nh_info *newi,
+					 struct netlink_ext_ack *extack)
+{
+	struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp);
+	struct nh_res_table *res_table;
+
+	if (nhg->mpath) {
+		return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE,
+					      group_nh, extack);
+	} else if (nhg->resilient) {
+		res_table = rtnl_dereference(nhg->res_table);
+		return replace_nexthop_single_notify_res(net, res_table,
+							 old, oldi, newi,
+							 extack);
+	}
+
+	return -EINVAL;
+}
+
 static int replace_nexthop_single(struct net *net, struct nexthop *old,
 				  struct nexthop *new,
 				  struct netlink_ext_ack *extack)
@@ -1198,8 +2118,8 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
 	list_for_each_entry(nhge, &old->grp_list, nh_list) {
 		struct nexthop *nhp = nhge->nh_parent;
 
-		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
-					     extack);
+		err = replace_nexthop_single_notify(net, nhp, old, oldi, newi,
+						    extack);
 		if (err)
 			goto err_notify;
 	}
@@ -1229,7 +2149,7 @@ err_notify:
 	list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
 		struct nexthop *nhp = nhge->nh_parent;
 
-		call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
+		replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL);
 	}
 	call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
 	return err;
@@ -1276,7 +2196,8 @@ static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
 }
 
 static int replace_nexthop(struct net *net, struct nexthop *old,
-			   struct nexthop *new, struct netlink_ext_ack *extack)
+			   struct nexthop *new, const struct nh_config *cfg,
+			   struct netlink_ext_ack *extack)
 {
 	bool new_is_reject = false;
 	struct nh_grp_entry *nhge;
@@ -1319,7 +2240,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
 	}
 
 	if (old->is_group)
-		err = replace_nexthop_grp(net, old, new, extack);
+		err = replace_nexthop_grp(net, old, new, cfg, extack);
 	else
 		err = replace_nexthop_single(net, old, new, extack);
 
@@ -1361,7 +2282,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
 		} else if (new_id > nh->id) {
 			pp = &next->rb_right;
 		} else if (replace) {
-			rc = replace_nexthop(net, nh, new_nh, extack);
+			rc = replace_nexthop(net, nh, new_nh, cfg, extack);
 			if (!rc) {
 				new_nh = nh; /* send notification with old nh */
 				replace_notify = 1;
@@ -1379,9 +2300,37 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
 		goto out;
 	}
 
+	if (new_nh->is_group) {
+		struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp);
+		struct nh_res_table *res_table;
+
+		if (nhg->resilient) {
+			res_table = rtnl_dereference(nhg->res_table);
+
+			/* Not passing the number of buckets is OK when
+			 * replacing, but not when creating a new group.
+			 */
+			if (!cfg->nh_grp_res_has_num_buckets) {
+				NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion");
+				rc = -EINVAL;
+				goto out;
+			}
+
+			nh_res_group_rebalance(nhg, res_table);
+
+			/* Do not send bucket notifications, we do full
+			 * notification below.
+			 */
+			nh_res_table_upkeep(res_table, false, false);
+		}
+	}
+
 	rb_link_node_rcu(&new_nh->rb_node, parent, pp);
 	rb_insert_color(&new_nh->rb_node, root);
 
+	/* The initial insertion is a full notification for mpath as well
+	 * as resilient groups.
+	 */
 	rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
 	if (rc)
 		rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
@@ -1441,6 +2390,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
 	u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
 	struct nh_group *nhg;
 	struct nexthop *nh;
+	int err;
 	int i;
 
 	if (WARN_ON(!num_nh))
@@ -1472,8 +2422,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
 		struct nh_info *nhi;
 
 		nhe = nexthop_find_by_id(net, entry[i].id);
-		if (!nexthop_get(nhe))
+		if (!nexthop_get(nhe)) {
+			err = -ENOENT;
 			goto out_no_nh;
+		}
 
 		nhi = rtnl_dereference(nhe->nh_info);
 		if (nhi->family == AF_INET)
@@ -1485,13 +2437,28 @@ static struct nexthop *nexthop_create_group(struct net *net,
 		nhg->nh_entries[i].nh_parent = nh;
 	}
 
-	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH)
+	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
 		nhg->mpath = 1;
+		nhg->is_multipath = true;
+	} else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
+		struct nh_res_table *res_table;
+
+		res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg);
+		if (!res_table) {
+			err = -ENOMEM;
+			goto out_no_nh;
+		}
+
+		rcu_assign_pointer(nhg->spare->res_table, res_table);
+		rcu_assign_pointer(nhg->res_table, res_table);
+		nhg->resilient = true;
+		nhg->is_multipath = true;
+	}
 
-	WARN_ON_ONCE(nhg->mpath != 1);
+	WARN_ON_ONCE(nhg->mpath + nhg->resilient != 1);
 
 	if (nhg->mpath)
-		nh_group_rebalance(nhg);
+		nh_mp_group_rebalance(nhg);
 
 	if (cfg->nh_fdb)
 		nhg->fdb_nh = 1;
@@ -1510,7 +2477,7 @@ out_no_nh:
 	kfree(nhg);
 	kfree(nh);
 
-	return ERR_PTR(-ENOENT);
+	return ERR_PTR(err);
 }
 
 static int nh_create_ipv4(struct net *net, struct nexthop *nh,
@@ -1680,6 +2647,70 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
 	return nh;
 }
 
+static int rtm_nh_get_timer(struct nlattr *attr, unsigned long fallback,
+			    unsigned long *timer_p, bool *has_p,
+			    struct netlink_ext_ack *extack)
+{
+	unsigned long timer;
+	u32 value;
+
+	if (!attr) {
+		*timer_p = fallback;
+		*has_p = false;
+		return 0;
+	}
+
+	value = nla_get_u32(attr);
+	timer = clock_t_to_jiffies(value);
+	if (timer == ~0UL) {
+		NL_SET_ERR_MSG(extack, "Timer value too large");
+		return -EINVAL;
+	}
+
+	*timer_p = timer;
+	*has_p = true;
+	return 0;
+}
+
+static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
+				    struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_policy_new)] = {};
+	int err;
+
+	if (res) {
+		err = nla_parse_nested(tb,
+				       ARRAY_SIZE(rtm_nh_res_policy_new) - 1,
+				       res, rtm_nh_res_policy_new, extack);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NHA_RES_GROUP_BUCKETS]) {
+		cfg->nh_grp_res_num_buckets =
+			nla_get_u16(tb[NHA_RES_GROUP_BUCKETS]);
+		cfg->nh_grp_res_has_num_buckets = true;
+		if (!cfg->nh_grp_res_num_buckets) {
+			NL_SET_ERR_MSG(extack, "Number of buckets needs to be non-0");
+			return -EINVAL;
+		}
+	}
+
+	err = rtm_nh_get_timer(tb[NHA_RES_GROUP_IDLE_TIMER],
+			       NH_RES_DEFAULT_IDLE_TIMER,
+			       &cfg->nh_grp_res_idle_timer,
+			       &cfg->nh_grp_res_has_idle_timer,
+			       extack);
+	if (err)
+		return err;
+
+	return rtm_nh_get_timer(tb[NHA_RES_GROUP_UNBALANCED_TIMER],
+				NH_RES_DEFAULT_UNBALANCED_TIMER,
+				&cfg->nh_grp_res_unbalanced_timer,
+				&cfg->nh_grp_res_has_unbalanced_timer,
+				extack);
+}
+
 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
 			    struct nlmsghdr *nlh, struct nh_config *cfg,
 			    struct netlink_ext_ack *extack)
@@ -1758,7 +2789,14 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
 			NL_SET_ERR_MSG(extack, "Invalid group type");
 			goto out;
 		}
-		err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
+		err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
+					  cfg->nh_grp_type, extack);
+		if (err)
+			goto out;
+
+		if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+			err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
+						       cfg, extack);
 
 		/* no other attributes should be set */
 		goto out;
@@ -1983,10 +3021,12 @@ errout_free:
 }
 
 struct nh_dump_filter {
+	u32 nh_id;
 	int dev_idx;
 	int master_idx;
 	bool group_filter;
 	bool fdb_filter;
+	u32 res_bucket_nh_id;
 };
 
 static bool nh_dump_filtered(struct nexthop *nh,
@@ -2166,6 +3206,318 @@ out_err:
 	return err;
 }
 
+static struct nexthop *
+nexthop_find_group_resilient(struct net *net, u32 id,
+			     struct netlink_ext_ack *extack)
+{
+	struct nh_group *nhg;
+	struct nexthop *nh;
+
+	nh = nexthop_find_by_id(net, id);
+	if (!nh)
+		return ERR_PTR(-ENOENT);
+
+	if (!nh->is_group) {
+		NL_SET_ERR_MSG(extack, "Not a nexthop group");
+		return ERR_PTR(-EINVAL);
+	}
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	if (!nhg->resilient) {
+		NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return nh;
+}
+
+static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p,
+			      struct netlink_ext_ack *extack)
+{
+	u32 idx;
+
+	if (attr) {
+		idx = nla_get_u32(attr);
+		if (!idx) {
+			NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+			return -EINVAL;
+		}
+		*nh_id_p = idx;
+	} else {
+		*nh_id_p = 0;
+	}
+
+	return 0;
+}
+
+static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
+				    struct nh_dump_filter *filter,
+				    struct netlink_callback *cb)
+{
+	struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)];
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+			  ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1,
+			  rtm_nh_policy_dump_bucket, NULL);
+	if (err < 0)
+		return err;
+
+	err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack);
+	if (err)
+		return err;
+
+	if (tb[NHA_RES_BUCKET]) {
+		size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1;
+
+		err = nla_parse_nested(res_tb, max,
+				       tb[NHA_RES_BUCKET],
+				       rtm_nh_res_bucket_policy_dump,
+				       cb->extack);
+		if (err < 0)
+			return err;
+
+		err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID],
+					 &filter->res_bucket_nh_id,
+					 cb->extack);
+		if (err)
+			return err;
+	}
+
+	return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
+}
+
+struct rtm_dump_res_bucket_ctx {
+	struct rtm_dump_nh_ctx nh;
+	u16 bucket_index;
+	u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
+};
+
+static struct rtm_dump_res_bucket_ctx *
+rtm_dump_res_bucket_ctx(struct netlink_callback *cb)
+{
+	struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx;
+
+	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+	return ctx;
+}
+
+struct rtm_dump_nexthop_bucket_data {
+	struct rtm_dump_res_bucket_ctx *ctx;
+	struct nh_dump_filter filter;
+};
+
+static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
+				      struct netlink_callback *cb,
+				      struct nexthop *nh,
+				      struct rtm_dump_nexthop_bucket_data *dd)
+{
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	struct nhmsg *nhm = nlmsg_data(cb->nlh);
+	struct nh_res_table *res_table;
+	struct nh_group *nhg;
+	u16 bucket_index;
+	int err;
+
+	if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
+		return 0;
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	res_table = rtnl_dereference(nhg->res_table);
+	for (bucket_index = dd->ctx->bucket_index;
+	     bucket_index < res_table->num_nh_buckets;
+	     bucket_index++) {
+		struct nh_res_bucket *bucket;
+		struct nh_grp_entry *nhge;
+
+		bucket = &res_table->nh_buckets[bucket_index];
+		nhge = rtnl_dereference(bucket->nh_entry);
+		if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family))
+			continue;
+
+		if (dd->filter.res_bucket_nh_id &&
+		    dd->filter.res_bucket_nh_id != nhge->nh->id)
+			continue;
+
+		err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+					 RTM_NEWNEXTHOPBUCKET, portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 cb->extack);
+		if (err < 0) {
+			if (likely(skb->len))
+				goto out;
+			goto out_err;
+		}
+	}
+
+	dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+	bucket_index = 0;
+
+out:
+	err = skb->len;
+out_err:
+	dd->ctx->bucket_index = bucket_index;
+	return err;
+}
+
+static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
+				      struct netlink_callback *cb,
+				      struct nexthop *nh, void *data)
+{
+	struct rtm_dump_nexthop_bucket_data *dd = data;
+	struct nh_group *nhg;
+
+	if (!nh->is_group)
+		return 0;
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	if (!nhg->resilient)
+		return 0;
+
+	return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd);
+}
+
+/* rtnl */
+static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb);
+	struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx };
+	struct net *net = sock_net(skb->sk);
+	struct nexthop *nh;
+	int err;
+
+	err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb);
+	if (err)
+		return err;
+
+	if (dd.filter.nh_id) {
+		nh = nexthop_find_group_resilient(net, dd.filter.nh_id,
+						  cb->extack);
+		if (IS_ERR(nh))
+			return PTR_ERR(nh);
+		err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd);
+	} else {
+		struct rb_root *root = &net->nexthop.rb_root;
+
+		err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh,
+					     &rtm_dump_nexthop_bucket_cb, &dd);
+	}
+
+	if (err < 0) {
+		if (likely(skb->len))
+			goto out;
+		goto out_err;
+	}
+
+out:
+	err = skb->len;
+out_err:
+	cb->seq = net->nexthop.seq;
+	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+	return err;
+}
+
+static int nh_valid_get_bucket_req_res_bucket(struct nlattr *res,
+					      u16 *bucket_index,
+					      struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_get)];
+	int err;
+
+	err = nla_parse_nested(tb, ARRAY_SIZE(rtm_nh_res_bucket_policy_get) - 1,
+			       res, rtm_nh_res_bucket_policy_get, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[NHA_RES_BUCKET_INDEX]) {
+		NL_SET_ERR_MSG(extack, "Bucket index is missing");
+		return -EINVAL;
+	}
+
+	*bucket_index = nla_get_u16(tb[NHA_RES_BUCKET_INDEX]);
+	return 0;
+}
+
+static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
+				   u32 *id, u16 *bucket_index,
+				   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+			  ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1,
+			  rtm_nh_policy_get_bucket, extack);
+	if (err < 0)
+		return err;
+
+	err = __nh_valid_get_del_req(nlh, tb, id, extack);
+	if (err)
+		return err;
+
+	if (!tb[NHA_RES_BUCKET]) {
+		NL_SET_ERR_MSG(extack, "Bucket information is missing");
+		return -EINVAL;
+	}
+
+	err = nh_valid_get_bucket_req_res_bucket(tb[NHA_RES_BUCKET],
+						 bucket_index, extack);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/* rtnl */
+static int rtm_get_nexthop_bucket(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+				  struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct nh_res_table *res_table;
+	struct sk_buff *skb = NULL;
+	struct nh_group *nhg;
+	struct nexthop *nh;
+	u16 bucket_index;
+	int err;
+	u32 id;
+
+	err = nh_valid_get_bucket_req(nlh, &id, &bucket_index, extack);
+	if (err)
+		return err;
+
+	nh = nexthop_find_group_resilient(net, id, extack);
+	if (IS_ERR(nh))
+		return PTR_ERR(nh);
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	res_table = rtnl_dereference(nhg->res_table);
+	if (bucket_index >= res_table->num_nh_buckets) {
+		NL_SET_ERR_MSG(extack, "Bucket index out of bounds");
+		return -ENOENT;
+	}
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	err = nh_fill_res_bucket(skb, nh, &res_table->nh_buckets[bucket_index],
+				 bucket_index, RTM_NEWNEXTHOPBUCKET,
+				 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+				 0, extack);
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		goto errout_free;
+	}
+
+	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+
+errout_free:
+	kfree_skb(skb);
+	return err;
+}
+
 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
 	unsigned int hash = nh_dev_hashfn(dev->ifindex);
@@ -2277,6 +3629,75 @@ out:
 }
 EXPORT_SYMBOL(nexthop_set_hw_flags);
 
+void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+				 bool offload, bool trap)
+{
+	struct nh_res_table *res_table;
+	struct nh_res_bucket *bucket;
+	struct nexthop *nexthop;
+	struct nh_group *nhg;
+
+	rcu_read_lock();
+
+	nexthop = nexthop_find_by_id(net, id);
+	if (!nexthop || !nexthop->is_group)
+		goto out;
+
+	nhg = rcu_dereference(nexthop->nh_grp);
+	if (!nhg->resilient)
+		goto out;
+
+	if (bucket_index >= nhg->res_table->num_nh_buckets)
+		goto out;
+
+	res_table = rcu_dereference(nhg->res_table);
+	bucket = &res_table->nh_buckets[bucket_index];
+	bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
+	if (offload)
+		bucket->nh_flags |= RTNH_F_OFFLOAD;
+	if (trap)
+		bucket->nh_flags |= RTNH_F_TRAP;
+
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nexthop_bucket_set_hw_flags);
+
+void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+				     unsigned long *activity)
+{
+	struct nh_res_table *res_table;
+	struct nexthop *nexthop;
+	struct nh_group *nhg;
+	u16 i;
+
+	rcu_read_lock();
+
+	nexthop = nexthop_find_by_id(net, id);
+	if (!nexthop || !nexthop->is_group)
+		goto out;
+
+	nhg = rcu_dereference(nexthop->nh_grp);
+	if (!nhg->resilient)
+		goto out;
+
+	/* Instead of silently ignoring some buckets, demand that the sizes
+	 * be the same.
+	 */
+	res_table = rcu_dereference(nhg->res_table);
+	if (num_buckets != res_table->num_nh_buckets)
+		goto out;
+
+	for (i = 0; i < num_buckets; i++) {
+		if (test_bit(i, activity))
+			nh_res_bucket_set_busy(&res_table->nh_buckets[i]);
+	}
+
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nexthop_res_grp_activity_update);
+
 static void __net_exit nexthop_net_exit(struct net *net)
 {
 	rtnl_lock();
@@ -2320,6 +3741,9 @@ static int __init nexthop_init(void)
 	rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
 	rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
 
+	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
+		      rtm_dump_nexthop_bucket, 0);
+
 	return 0;
 }
 subsys_initcall(nexthop_init);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bba150fdd265..f6787c55f6ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -21,7 +21,7 @@
  *		Alan Cox	:	Added BSD route gw semantics
  *		Alan Cox	:	Super /proc >4K
  *		Alan Cox	:	MTU in route table
- *		Alan Cox	: 	MSS actually. Also added the window
+ *		Alan Cox	:	MSS actually. Also added the window
  *					clamper.
  *		Sam Lantinga	:	Fixed route matching in rt_del()
  *		Alan Cox	:	Routing cache support.
@@ -41,7 +41,7 @@
  *		Olaf Erb	:	irtt wasn't being copied right.
  *		Bjorn Ekwall	:	Kerneld route support.
  *		Alan Cox	:	Multicast fixed (I hope)
- * 		Pavel Krauz	:	Limited broadcast fixed
+ *		Pavel Krauz	:	Limited broadcast fixed
  *		Mike McLagan	:	Routing by source
  *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
  *					route.c and rewritten from scratch.
@@ -54,8 +54,8 @@
  *	Robert Olsson		:	Added rt_cache statistics
  *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
  *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
- * 	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
- * 	Ilia Sotnikov		:	Removed TOS from hash calculations
+ *	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
+ *	Ilia Sotnikov		:	Removed TOS from hash calculations
  */
 
 #define pr_fmt(fmt) "IPv4: " fmt
@@ -66,6 +66,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -234,19 +235,6 @@ static const struct seq_operations rt_cache_seq_ops = {
 	.show   = rt_cache_seq_show,
 };
 
-static int rt_cache_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rt_cache_seq_ops);
-}
-
-static const struct proc_ops rt_cache_proc_ops = {
-	.proc_open	= rt_cache_seq_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-};
-
-
 static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	int cpu;
@@ -324,19 +312,6 @@ static const struct seq_operations rt_cpu_seq_ops = {
 	.show   = rt_cpu_seq_show,
 };
 
-
-static int rt_cpu_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rt_cpu_seq_ops);
-}
-
-static const struct proc_ops rt_cpu_proc_ops = {
-	.proc_open	= rt_cpu_seq_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-};
-
 #ifdef CONFIG_IP_ROUTE_CLASSID
 static int rt_acct_proc_show(struct seq_file *m, void *v)
 {
@@ -367,13 +342,13 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_create("rt_cache", 0444, net->proc_net,
-			  &rt_cache_proc_ops);
+	pde = proc_create_seq("rt_cache", 0444, net->proc_net,
+			      &rt_cache_seq_ops);
 	if (!pde)
 		goto err1;
 
-	pde = proc_create("rt_cache", 0444,
-			  net->proc_net_stat, &rt_cpu_proc_ops);
+	pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat,
+			      &rt_cpu_seq_ops);
 	if (!pde)
 		goto err2;
 
@@ -478,8 +453,10 @@ static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 	__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
 }
 
-#define IP_IDENTS_SZ 2048u
-
+/* Hash tables of size 2048..262144 depending on RAM size.
+ * Each bucket uses 8 bytes.
+ */
+static u32 ip_idents_mask __read_mostly;
 static atomic_t *ip_idents __read_mostly;
 static u32 *ip_tstamps __read_mostly;
 
@@ -489,12 +466,16 @@ static u32 *ip_tstamps __read_mostly;
  */
 u32 ip_idents_reserve(u32 hash, int segs)
 {
-	u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
-	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
-	u32 old = READ_ONCE(*p_tstamp);
-	u32 now = (u32)jiffies;
+	u32 bucket, old, now = (u32)jiffies;
+	atomic_t *p_id;
+	u32 *p_tstamp;
 	u32 delta = 0;
 
+	bucket = hash & ip_idents_mask;
+	p_tstamp = ip_tstamps + bucket;
+	p_id = ip_idents + bucket;
+	old = READ_ONCE(*p_tstamp);
+
 	if (old != now && cmpxchg(p_tstamp, old, now) == old)
 		delta = prandom_u32_max(now - old);
 
@@ -722,6 +703,7 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
 
 		for_each_possible_cpu(i) {
 			struct rtable __rcu **prt;
+
 			prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
 			rt = rcu_dereference(*prt);
 			if (rt)
@@ -1258,12 +1240,12 @@ static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 
 /*
-   We do not cache source address of outgoing interface,
-   because it is used only by IP RR, TS and SRR options,
-   so that it out of fast path.
-
-   BTW remember: "addr" is allowed to be not aligned
-   in IP options!
+ * We do not cache source address of outgoing interface,
+ * because it is used only by IP RR, TS and SRR options,
+ * so that it out of fast path.
+ *
+ * BTW remember: "addr" is allowed to be not aligned
+ * in IP options!
  */
 
 void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
@@ -2108,7 +2090,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto out;
 
 	/* Check for the most weird martians, which can be not detected
-	   by fib_lookup.
+	 * by fib_lookup.
 	 */
 
 	tun_info = skb_tunnel_info(skb);
@@ -2246,7 +2228,7 @@ local_input:
 	if (res->type == RTN_UNREACHABLE) {
 		rth->dst.input= ip_error;
 		rth->dst.error= -err;
-		rth->rt_flags 	&= ~RTCF_LOCAL;
+		rth->rt_flags	&= ~RTCF_LOCAL;
 	}
 
 	if (do_cache) {
@@ -2317,15 +2299,15 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		       u8 tos, struct net_device *dev, struct fib_result *res)
 {
 	/* Multicast recognition logic is moved from route cache to here.
-	   The problem was that too many Ethernet cards have broken/missing
-	   hardware multicast filters :-( As result the host on multicasting
-	   network acquires a lot of useless route cache entries, sort of
-	   SDR messages from all the world. Now we try to get rid of them.
-	   Really, provided software IP multicast filter is organized
-	   reasonably (at least, hashed), it does not result in a slowdown
-	   comparing with route cache reject entries.
-	   Note, that multicast routers are not affected, because
-	   route cache entry is created eventually.
+	 * The problem was that too many Ethernet cards have broken/missing
+	 * hardware multicast filters :-( As result the host on multicasting
+	 * network acquires a lot of useless route cache entries, sort of
+	 * SDR messages from all the world. Now we try to get rid of them.
+	 * Really, provided software IP multicast filter is organized
+	 * reasonably (at least, hashed), it does not result in a slowdown
+	 * comparing with route cache reject entries.
+	 * Note, that multicast routers are not affected, because
+	 * route cache entry is created eventually.
 	 */
 	if (ipv4_is_multicast(daddr)) {
 		struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -2537,11 +2519,11 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		rth = ERR_PTR(-ENETUNREACH);
 
 		/* I removed check for oif == dev_out->oif here.
-		   It was wrong for two reasons:
-		   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
-		      is assigned to multiple interfaces.
-		   2. Moreover, we are allowed to send packets with saddr
-		      of another iface. --ANK
+		 * It was wrong for two reasons:
+		 * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
+		 *    is assigned to multiple interfaces.
+		 * 2. Moreover, we are allowed to send packets with saddr
+		 *    of another iface. --ANK
 		 */
 
 		if (fl4->flowi4_oif == 0 &&
@@ -2553,18 +2535,18 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 				goto out;
 
 			/* Special hack: user can direct multicasts
-			   and limited broadcast via necessary interface
-			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
-			   This hack is not just for fun, it allows
-			   vic,vat and friends to work.
-			   They bind socket to loopback, set ttl to zero
-			   and expect that it will work.
-			   From the viewpoint of routing cache they are broken,
-			   because we are not allowed to build multicast path
-			   with loopback source addr (look, routing cache
-			   cannot know, that ttl is zero, so that packet
-			   will not leave this host and route is valid).
-			   Luckily, this hack is good workaround.
+			 * and limited broadcast via necessary interface
+			 * without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
+			 * This hack is not just for fun, it allows
+			 * vic,vat and friends to work.
+			 * They bind socket to loopback, set ttl to zero
+			 * and expect that it will work.
+			 * From the viewpoint of routing cache they are broken,
+			 * because we are not allowed to build multicast path
+			 * with loopback source addr (look, routing cache
+			 * cannot know, that ttl is zero, so that packet
+			 * will not leave this host and route is valid).
+			 * Luckily, this hack is good workaround.
 			 */
 
 			fl4->flowi4_oif = dev_out->ifindex;
@@ -2627,21 +2609,21 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		    (ipv4_is_multicast(fl4->daddr) ||
 		    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
 			/* Apparently, routing tables are wrong. Assume,
-			   that the destination is on link.
-
-			   WHY? DW.
-			   Because we are allowed to send to iface
-			   even if it has NO routes and NO assigned
-			   addresses. When oif is specified, routing
-			   tables are looked up with only one purpose:
-			   to catch if destination is gatewayed, rather than
-			   direct. Moreover, if MSG_DONTROUTE is set,
-			   we send packet, ignoring both routing tables
-			   and ifaddr state. --ANK
-
-
-			   We could make it even if oif is unknown,
-			   likely IPv6, but we do not.
+			 * that the destination is on link.
+			 *
+			 * WHY? DW.
+			 * Because we are allowed to send to iface
+			 * even if it has NO routes and NO assigned
+			 * addresses. When oif is specified, routing
+			 * tables are looked up with only one purpose:
+			 * to catch if destination is gatewayed, rather than
+			 * direct. Moreover, if MSG_DONTROUTE is set,
+			 * we send packet, ignoring both routing tables
+			 * and ifaddr state. --ANK
+			 *
+			 *
+			 * We could make it even if oif is unknown,
+			 * likely IPv6, but we do not.
 			 */
 
 			if (fl4->saddr == 0)
@@ -3553,18 +3535,25 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 
 int __init ip_rt_init(void)
 {
+	void *idents_hash;
 	int cpu;
 
-	ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
-				  GFP_KERNEL);
-	if (!ip_idents)
-		panic("IP: failed to allocate ip_idents\n");
+	/* For modern hosts, this will use 2 MB of memory */
+	idents_hash = alloc_large_system_hash("IP idents",
+					      sizeof(*ip_idents) + sizeof(*ip_tstamps),
+					      0,
+					      16, /* one bucket per 64 KB */
+					      HASH_ZERO,
+					      NULL,
+					      &ip_idents_mask,
+					      2048,
+					      256*1024);
+
+	ip_idents = idents_hash;
 
-	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+	prandom_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));
 
-	ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
-	if (!ip_tstamps)
-		panic("IP: failed to allocate ip_tstamps\n");
+	ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents);
 
 	for_each_possible_cpu(cpu) {
 		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index bc7d2a586e18..17c322b875fd 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -229,7 +229,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
 }
 EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
 
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
 static bool tcp_bpf_stream_read(const struct sock *sk)
 {
 	struct sk_psock *psock;
@@ -629,4 +629,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
 	if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
 		newsk->sk_prot = sk->sk_prot_creator;
 }
-#endif /* CONFIG_BPF_STREAM_PARSER */
+#endif /* CONFIG_BPF_SYSCALL */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69a545db80d2..4cf4dd532d1c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2914,7 +2914,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 	/* D. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		WARN_ON(tp->retrans_out != 0);
+		WARN_ON(tp->retrans_out != 0 && !tp->syn_data);
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (icsk->icsk_ca_state) {
@@ -5994,11 +5994,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 			tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
 		else
 			tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
-		skb_rbtree_walk_from(data) {
-			if (__tcp_retransmit_skb(sk, data, 1))
-				break;
-		}
-		tcp_rearm_rto(sk);
+		skb_rbtree_walk_from(data)
+			 tcp_mark_skb_lost(sk, data);
+		tcp_xmit_retransmit_queue(sk);
 		NET_INC_STATS(sock_net(sk),
 				LINUX_MIB_TCPFASTOPENACTIVEFAIL);
 		return true;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fbf140a770d8..bde781f46b41 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2775,13 +2775,17 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
  * a packet is still in a qdisc or driver queue.
  * In this case, there is very little point doing a retransmit !
  */
-static bool skb_still_in_host_queue(const struct sock *sk,
+static bool skb_still_in_host_queue(struct sock *sk,
 				    const struct sk_buff *skb)
 {
 	if (unlikely(skb_fclone_busy(sk, skb))) {
-		NET_INC_STATS(sock_net(sk),
-			      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
-		return true;
+		set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+		smp_mb__after_atomic();
+		if (skb_fclone_busy(sk, skb)) {
+			NET_INC_STATS(sock_net(sk),
+				      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+			return true;
+		}
 	}
 	return false;
 }
@@ -3147,14 +3151,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 	if (icsk->icsk_mtup.probe_size)
 		icsk->icsk_mtup.probe_size = 0;
 
-	/* Do not sent more than we queued. 1/4 is reserved for possible
-	 * copying overhead: fragmentation, tunneling, mangling etc.
-	 */
-	if (refcount_read(&sk->sk_wmem_alloc) >
-	    min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
-		  sk->sk_sndbuf))
-		return -EAGAIN;
-
 	if (skb_still_in_host_queue(sk, skb))
 		return -EBUSY;
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6126f8bf94b3..56e479d158b7 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -381,7 +381,7 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 
 looped_back:
 	if (hdr->segments_left == 0) {
-		if (hdr->nexthdr == NEXTHDR_IPV6) {
+		if (hdr->nexthdr == NEXTHDR_IPV6 || hdr->nexthdr == NEXTHDR_IPV4) {
 			int offset = (hdr->hdrlen + 1) << 3;
 
 			skb_postpull_rcsum(skb, skb_network_header(skb),
@@ -397,7 +397,8 @@ looped_back:
 			skb_reset_network_header(skb);
 			skb_reset_transport_header(skb);
 			skb->encapsulation = 0;
-
+			if (hdr->nexthdr == NEXTHDR_IPV4)
+				skb->protocol = htons(ETH_P_IP);
 			__skb_tunnel_rx(skb, skb->dev, net);
 
 			netif_rx(skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1056b0229ffd..ebb7519bec2a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2360,7 +2360,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 
 			memset(&hash_keys, 0, sizeof(hash_keys));
 
-                        if (!flkeys) {
+			if (!flkeys) {
 				skb_flow_dissect_flow_keys(skb, &keys, flag);
 				flkeys = &keys;
 			}
@@ -2500,20 +2500,20 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
 					 struct flowi6 *fl6,
 					 int flags)
 {
-        struct dst_entry *dst;
-        struct rt6_info *rt6;
+	struct dst_entry *dst;
+	struct rt6_info *rt6;
 
-        rcu_read_lock();
-        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
-        rt6 = (struct rt6_info *)dst;
-        /* For dst cached in uncached_list, refcnt is already taken. */
-        if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
-                dst = &net->ipv6.ip6_null_entry->dst;
-                dst_hold(dst);
-        }
-        rcu_read_unlock();
+	rcu_read_lock();
+	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+	rt6 = (struct rt6_info *)dst;
+	/* For dst cached in uncached_list, refcnt is already taken. */
+	if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+		dst = &net->ipv6.ip6_null_entry->dst;
+		dst_hold(dst);
+	}
+	rcu_read_unlock();
 
-        return dst;
+	return dst;
 }
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c2a0c78e84d4..8936f48570fc 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -119,12 +119,12 @@ static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
 	return (struct seg6_local_lwt *)lwt->data;
 }
 
-static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
+static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb, int flags)
 {
 	struct ipv6_sr_hdr *srh;
 	int len, srhoff = 0;
 
-	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0)
 		return NULL;
 
 	if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
@@ -152,13 +152,10 @@ static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
 {
 	struct ipv6_sr_hdr *srh;
 
-	srh = get_srh(skb);
+	srh = get_srh(skb, IP6_FH_F_SKIP_RH);
 	if (!srh)
 		return NULL;
 
-	if (srh->segments_left == 0)
-		return NULL;
-
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	if (!seg6_hmac_validate_skb(skb))
 		return NULL;
@@ -172,7 +169,7 @@ static bool decap_and_validate(struct sk_buff *skb, int proto)
 	struct ipv6_sr_hdr *srh;
 	unsigned int off = 0;
 
-	srh = get_srh(skb);
+	srh = get_srh(skb, 0);
 	if (srh && srh->segments_left > 0)
 		return false;
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 203890e378cb..2ee20743cb41 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -802,7 +802,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 	u16 version;
 	int length;
 
-	/* UDP has verifed checksum */
+	/* UDP has verified checksum */
 
 	/* UDP always verifies the packet length. */
 	__skb_pull(skb, sizeof(struct udphdr));
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 0511bbe4af7b..1078e14f1acf 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -122,8 +122,8 @@ static struct lapb_cb *lapb_create_cb(void)
 
 	timer_setup(&lapb->t1timer, NULL, 0);
 	timer_setup(&lapb->t2timer, NULL, 0);
-	lapb->t1timer_stop = true;
-	lapb->t2timer_stop = true;
+	lapb->t1timer_running = false;
+	lapb->t2timer_running = false;
 
 	lapb->t1      = LAPB_DEFAULT_T1;
 	lapb->t2      = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 0230b272b7d1..5be68869064d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -40,7 +40,7 @@ void lapb_start_t1timer(struct lapb_cb *lapb)
 	lapb->t1timer.function = lapb_t1timer_expiry;
 	lapb->t1timer.expires  = jiffies + lapb->t1;
 
-	lapb->t1timer_stop = false;
+	lapb->t1timer_running = true;
 	add_timer(&lapb->t1timer);
 }
 
@@ -51,25 +51,25 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
 	lapb->t2timer.function = lapb_t2timer_expiry;
 	lapb->t2timer.expires  = jiffies + lapb->t2;
 
-	lapb->t2timer_stop = false;
+	lapb->t2timer_running = true;
 	add_timer(&lapb->t2timer);
 }
 
 void lapb_stop_t1timer(struct lapb_cb *lapb)
 {
-	lapb->t1timer_stop = true;
+	lapb->t1timer_running = false;
 	del_timer(&lapb->t1timer);
 }
 
 void lapb_stop_t2timer(struct lapb_cb *lapb)
 {
-	lapb->t2timer_stop = true;
+	lapb->t2timer_running = false;
 	del_timer(&lapb->t2timer);
 }
 
 int lapb_t1timer_running(struct lapb_cb *lapb)
 {
-	return timer_pending(&lapb->t1timer);
+	return lapb->t1timer_running;
 }
 
 static void lapb_t2timer_expiry(struct timer_list *t)
@@ -79,13 +79,14 @@ static void lapb_t2timer_expiry(struct timer_list *t)
 	spin_lock_bh(&lapb->lock);
 	if (timer_pending(&lapb->t2timer)) /* A new timer has been set up */
 		goto out;
-	if (lapb->t2timer_stop) /* The timer has been stopped */
+	if (!lapb->t2timer_running) /* The timer has been stopped */
 		goto out;
 
 	if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
 		lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
 		lapb_timeout_response(lapb);
 	}
+	lapb->t2timer_running = false;
 
 out:
 	spin_unlock_bh(&lapb->lock);
@@ -98,7 +99,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 	spin_lock_bh(&lapb->lock);
 	if (timer_pending(&lapb->t1timer)) /* A new timer has been set up */
 		goto out;
-	if (lapb->t1timer_stop) /* The timer has been stopped */
+	if (!lapb->t1timer_running) /* The timer has been stopped */
 		goto out;
 
 	switch (lapb->state) {
@@ -127,6 +128,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb->state = LAPB_STATE_0;
 				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S1 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
@@ -151,6 +153,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb->state = LAPB_STATE_0;
 				lapb_disconnect_confirmation(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S2 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
@@ -169,6 +172,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb_stop_t2timer(lapb);
 				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S3 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
@@ -186,6 +190,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb->state = LAPB_STATE_0;
 				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S4 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 89a4225ed321..2b7eec93c9f5 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 	int expected_opsize;
 	u8 version;
 	u8 flags;
+	u8 i;
 
 	switch (subtype) {
 	case MPTCPOPT_MP_CAPABLE:
@@ -272,14 +273,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		break;
 
 	case MPTCPOPT_RM_ADDR:
-		if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
+		if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 ||
+		    opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX)
 			break;
 
 		ptr++;
 
 		mp_opt->rm_addr = 1;
-		mp_opt->rm_id = *ptr++;
-		pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
+		mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
+		for (i = 0; i < mp_opt->rm_list.nr; i++)
+			mp_opt->rm_list.ids[i] = *ptr++;
+		pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr);
 		break;
 
 	case MPTCPOPT_MP_PRIO:
@@ -676,20 +680,25 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-	u8 rm_id;
+	struct mptcp_rm_list rm_list;
+	int i, len;
 
 	if (!mptcp_pm_should_rm_signal(msk) ||
-	    !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id)))
+	    !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list)))
 		return false;
 
-	if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
+	len = mptcp_rm_addr_len(&rm_list);
+	if (len < 0)
+		return false;
+	if (remaining < len)
 		return false;
 
-	*size = TCPOLEN_MPTCP_RM_ADDR_BASE;
+	*size = len;
 	opts->suboptions |= OPTION_MPTCP_RM_ADDR;
-	opts->rm_id = rm_id;
+	opts->rm_list = rm_list;
 
-	pr_debug("rm_id=%d", opts->rm_id);
+	for (i = 0; i < opts->rm_list.nr; i++)
+		pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
 
 	return true;
 }
@@ -1042,7 +1051,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (mp_opt.rm_addr) {
-		mptcp_pm_rm_addr_received(msk, mp_opt.rm_id);
+		mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
 		mp_opt.rm_addr = 0;
 	}
 
@@ -1221,9 +1230,23 @@ mp_capable_done:
 	}
 
 	if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
+		u8 i = 1;
+
 		*ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
-				      TCPOLEN_MPTCP_RM_ADDR_BASE,
-				      0, opts->rm_id);
+				      TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr,
+				      0, opts->rm_list.ids[0]);
+
+		while (i < opts->rm_list.nr) {
+			u8 id1, id2, id3, id4;
+
+			id1 = opts->rm_list.ids[i];
+			id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP;
+			id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP;
+			id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP;
+			put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr);
+			ptr += 1;
+			i += 4;
+		}
 	}
 
 	if (OPTION_MPTCP_PRIO & opts->suboptions) {
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 6fd4b2c1b076..4cfd80f90003 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -39,29 +39,29 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 	return 0;
 }
 
-int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id)
+int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
 {
 	u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
 
-	pr_debug("msk=%p, local_id=%d", msk, local_id);
+	pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
 
 	if (rm_addr) {
 		pr_warn("addr_signal error, rm_addr=%d", rm_addr);
 		return -EINVAL;
 	}
 
-	msk->pm.rm_id = local_id;
+	msk->pm.rm_list_tx = *rm_list;
 	rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
 	WRITE_ONCE(msk->pm.addr_signal, rm_addr);
 	return 0;
 }
 
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)
+int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
 {
-	pr_debug("msk=%p, local_id=%d", msk, local_id);
+	pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
 
 	spin_lock_bh(&msk->pm.lock);
-	mptcp_pm_nl_rm_subflow_received(msk, local_id);
+	mptcp_pm_nl_rm_subflow_received(msk, rm_list);
 	spin_unlock_bh(&msk->pm.lock);
 	return 0;
 }
@@ -205,17 +205,20 @@ void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
 	mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
 }
 
-void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
+void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
+			       const struct mptcp_rm_list *rm_list)
 {
 	struct mptcp_pm_data *pm = &msk->pm;
+	u8 i;
 
-	pr_debug("msk=%p remote_id=%d", msk, rm_id);
+	pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
 
-	mptcp_event_addr_removed(msk, rm_id);
+	for (i = 0; i < rm_list->nr; i++)
+		mptcp_event_addr_removed(msk, rm_list->ids[i]);
 
 	spin_lock_bh(&pm->lock);
 	mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
-	pm->rm_id = rm_id;
+	pm->rm_list_rx = *rm_list;
 	spin_unlock_bh(&pm->lock);
 }
 
@@ -258,9 +261,9 @@ out_unlock:
 }
 
 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
-			     u8 *rm_id)
+			     struct mptcp_rm_list *rm_list)
 {
-	int ret = false;
+	int ret = false, len;
 
 	spin_lock_bh(&msk->pm.lock);
 
@@ -268,10 +271,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 	if (!mptcp_pm_should_rm_signal(msk))
 		goto out_unlock;
 
-	if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
+	len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
+	if (len < 0) {
+		WRITE_ONCE(msk->pm.addr_signal, 0);
+		goto out_unlock;
+	}
+	if (remaining < len)
 		goto out_unlock;
 
-	*rm_id = msk->pm.rm_id;
+	*rm_list = msk->pm.rm_list_tx;
 	WRITE_ONCE(msk->pm.addr_signal, 0);
 	ret = true;
 
@@ -291,7 +299,8 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
 	msk->pm.add_addr_accepted = 0;
 	msk->pm.local_addr_used = 0;
 	msk->pm.subflows = 0;
-	msk->pm.rm_id = 0;
+	msk->pm.rm_list_tx.nr = 0;
+	msk->pm.rm_list_rx.nr = 0;
 	WRITE_ONCE(msk->pm.work_pending, false);
 	WRITE_ONCE(msk->pm.addr_signal, 0);
 	WRITE_ONCE(msk->pm.accept_addr, false);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 8e8e35fa4002..5857b82c88bf 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -575,36 +575,40 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow, *tmp;
 	struct sock *sk = (struct sock *)msk;
+	u8 i;
 
-	pr_debug("address rm_id %d", msk->pm.rm_id);
+	pr_debug("address rm_list_nr %d", msk->pm.rm_list_rx.nr);
 
 	msk_owned_by_me(msk);
 
-	if (!msk->pm.rm_id)
+	if (!msk->pm.rm_list_rx.nr)
 		return;
 
 	if (list_empty(&msk->conn_list))
 		return;
 
-	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+	for (i = 0; i < msk->pm.rm_list_rx.nr; i++) {
+		list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+			int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
 
-		if (msk->pm.rm_id != subflow->remote_id)
-			continue;
+			if (msk->pm.rm_list_rx.ids[i] != subflow->remote_id)
+				continue;
 
-		spin_unlock_bh(&msk->pm.lock);
-		mptcp_subflow_shutdown(sk, ssk, how);
-		mptcp_close_ssk(sk, ssk, subflow);
-		spin_lock_bh(&msk->pm.lock);
+			pr_debug(" -> address rm_list_ids[%d]=%u", i, msk->pm.rm_list_rx.ids[i]);
+			spin_unlock_bh(&msk->pm.lock);
+			mptcp_subflow_shutdown(sk, ssk, how);
+			mptcp_close_ssk(sk, ssk, subflow);
+			spin_lock_bh(&msk->pm.lock);
 
-		msk->pm.add_addr_accepted--;
-		msk->pm.subflows--;
-		WRITE_ONCE(msk->pm.accept_addr, true);
+			msk->pm.add_addr_accepted--;
+			msk->pm.subflows--;
+			WRITE_ONCE(msk->pm.accept_addr, true);
 
-		__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
+			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
 
-		break;
+			break;
+		}
 	}
 }
 
@@ -641,39 +645,44 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
 	spin_unlock_bh(&msk->pm.lock);
 }
 
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
+void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+				     const struct mptcp_rm_list *rm_list)
 {
 	struct mptcp_subflow_context *subflow, *tmp;
 	struct sock *sk = (struct sock *)msk;
+	u8 i;
 
-	pr_debug("subflow rm_id %d", rm_id);
+	pr_debug("subflow rm_list_nr %d", rm_list->nr);
 
 	msk_owned_by_me(msk);
 
-	if (!rm_id)
+	if (!rm_list->nr)
 		return;
 
 	if (list_empty(&msk->conn_list))
 		return;
 
-	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+	for (i = 0; i < rm_list->nr; i++) {
+		list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+			int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
 
-		if (rm_id != subflow->local_id)
-			continue;
+			if (rm_list->ids[i] != subflow->local_id)
+				continue;
 
-		spin_unlock_bh(&msk->pm.lock);
-		mptcp_subflow_shutdown(sk, ssk, how);
-		mptcp_close_ssk(sk, ssk, subflow);
-		spin_lock_bh(&msk->pm.lock);
+			pr_debug(" -> subflow rm_list_ids[%d]=%u", i, rm_list->ids[i]);
+			spin_unlock_bh(&msk->pm.lock);
+			mptcp_subflow_shutdown(sk, ssk, how);
+			mptcp_close_ssk(sk, ssk, subflow);
+			spin_lock_bh(&msk->pm.lock);
 
-		msk->pm.local_addr_used--;
-		msk->pm.subflows--;
+			msk->pm.local_addr_used--;
+			msk->pm.subflows--;
 
-		__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
+			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
 
-		break;
+			break;
+		}
 	}
 }
 
@@ -1071,12 +1080,15 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
 				      struct mptcp_addr_info *addr,
 				      bool force)
 {
+	struct mptcp_rm_list list = { .nr = 0 };
 	bool ret;
 
+	list.ids[list.nr++] = addr->id;
+
 	ret = remove_anno_list_by_saddr(msk, addr);
 	if (ret || force) {
 		spin_lock_bh(&msk->pm.lock);
-		mptcp_pm_remove_addr(msk, addr->id);
+		mptcp_pm_remove_addr(msk, &list);
 		spin_unlock_bh(&msk->pm.lock);
 	}
 	return ret;
@@ -1087,9 +1099,12 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
 {
 	struct mptcp_sock *msk;
 	long s_slot = 0, s_num = 0;
+	struct mptcp_rm_list list = { .nr = 0 };
 
 	pr_debug("remove_id=%d", addr->id);
 
+	list.ids[list.nr++] = addr->id;
+
 	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
 		struct sock *sk = (struct sock *)msk;
 		bool remove_subflow;
@@ -1103,7 +1118,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
 		remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
 		mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
 		if (remove_subflow)
-			mptcp_pm_remove_subflow(msk, addr->id);
+			mptcp_pm_remove_subflow(msk, &list);
 		release_sock(sk);
 
 next:
@@ -1185,14 +1200,61 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
-static void __flush_addrs(struct net *net, struct list_head *list)
+static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+					       struct list_head *rm_list)
+{
+	struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
+	struct mptcp_pm_addr_entry *entry;
+
+	list_for_each_entry(entry, rm_list, list) {
+		if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
+		    alist.nr < MPTCP_RM_IDS_MAX &&
+		    slist.nr < MPTCP_RM_IDS_MAX) {
+			alist.ids[alist.nr++] = entry->addr.id;
+			slist.ids[slist.nr++] = entry->addr.id;
+		} else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
+			 alist.nr < MPTCP_RM_IDS_MAX) {
+			alist.ids[alist.nr++] = entry->addr.id;
+		}
+	}
+
+	if (alist.nr) {
+		spin_lock_bh(&msk->pm.lock);
+		mptcp_pm_remove_addr(msk, &alist);
+		spin_unlock_bh(&msk->pm.lock);
+	}
+	if (slist.nr)
+		mptcp_pm_remove_subflow(msk, &slist);
+}
+
+static void mptcp_nl_remove_addrs_list(struct net *net,
+				       struct list_head *rm_list)
+{
+	long s_slot = 0, s_num = 0;
+	struct mptcp_sock *msk;
+
+	if (list_empty(rm_list))
+		return;
+
+	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+		struct sock *sk = (struct sock *)msk;
+
+		lock_sock(sk);
+		mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+		release_sock(sk);
+
+		sock_put(sk);
+		cond_resched();
+	}
+}
+
+static void __flush_addrs(struct list_head *list)
 {
 	while (!list_empty(list)) {
 		struct mptcp_pm_addr_entry *cur;
 
 		cur = list_entry(list->next,
 				 struct mptcp_pm_addr_entry, list);
-		mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
 		list_del_rcu(&cur->list);
 		mptcp_pm_free_addr_entry(cur);
 	}
@@ -1217,7 +1279,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
 	pernet->next_id = 1;
 	bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
 	spin_unlock_bh(&pernet->lock);
-	__flush_addrs(sock_net(skb->sk), &free_list);
+	mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+	__flush_addrs(&free_list);
 	return 0;
 }
 
@@ -1814,7 +1877,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
 		/* net is removed from namespace list, can't race with
 		 * other modifiers
 		 */
-		__flush_addrs(net, &pernet->local_addr_list);
+		__flush_addrs(&pernet->local_addr_list);
 	}
 }
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e21a5bc36cf0..1111a99b024f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -61,7 +61,7 @@
 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
 #define TCPOLEN_MPTCP_PORT_LEN		2
 #define TCPOLEN_MPTCP_PORT_ALIGN	2
-#define TCPOLEN_MPTCP_RM_ADDR_BASE	4
+#define TCPOLEN_MPTCP_RM_ADDR_BASE	3
 #define TCPOLEN_MPTCP_PRIO		3
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
@@ -142,7 +142,7 @@ struct mptcp_options_received {
 		mpc_map:1,
 		__unused:2;
 	u8	addr_id;
-	u8	rm_id;
+	struct mptcp_rm_list rm_list;
 	union {
 		struct in_addr	addr;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -207,7 +207,8 @@ struct mptcp_pm_data {
 	u8		local_addr_used;
 	u8		subflows;
 	u8		status;
-	u8		rm_id;
+	struct mptcp_rm_list rm_list_tx;
+	struct mptcp_rm_list rm_list_rx;
 };
 
 struct mptcp_data_frag {
@@ -647,7 +648,8 @@ void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
 void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 				const struct mptcp_addr_info *addr);
 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
-void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
+			       const struct mptcp_rm_list *rm_list);
 void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 				 struct mptcp_addr_info *addr,
@@ -661,8 +663,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 			   const struct mptcp_addr_info *addr,
 			   bool echo, bool port);
-int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
+int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
+int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
 
 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
 		 const struct sock *ssk, gfp_t gfp);
@@ -709,16 +711,25 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
 	return len;
 }
 
+static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
+{
+	if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX)
+		return -EINVAL;
+
+	return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
+}
+
 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 			      struct mptcp_addr_info *saddr, bool *echo, bool *port);
 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
-			     u8 *rm_id);
+			     struct mptcp_rm_list *rm_list);
 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 
 void __init mptcp_pm_nl_init(void);
 void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
 void mptcp_pm_nl_work(struct mptcp_sock *msk);
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+				     const struct mptcp_rm_list *rm_list);
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index db7479db8512..4f33307fa3cf 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -397,6 +397,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 			msg = "not picking up existing connection ";
 			goto out_invalid;
 		}
+		break;
 	case CT_DCCP_REQUEST:
 		break;
 	case CT_DCCP_INVALID:
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index c77ba8690ed8..1bce1d2805c4 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -79,11 +79,8 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 				   enum flow_offload_tuple_dir dir)
 {
 	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
-	struct dst_entry *other_dst = route->tuple[!dir].dst;
 	struct dst_entry *dst = route->tuple[dir].dst;
-
-	if (!dst_hold_safe(route->tuple[dir].dst))
-		return -1;
+	int i, j = 0;
 
 	switch (flow_tuple->l3proto) {
 	case NFPROTO_IPV4:
@@ -94,12 +91,46 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 		break;
 	}
 
-	flow_tuple->iifidx = other_dst->dev->ifindex;
-	flow_tuple->dst_cache = dst;
+	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
+	for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
+		flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
+		flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
+		if (route->tuple[dir].in.ingress_vlans & BIT(i))
+			flow_tuple->in_vlan_ingress |= BIT(j);
+		j++;
+	}
+	flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
+
+	switch (route->tuple[dir].xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
+		       ETH_ALEN);
+		memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
+		       ETH_ALEN);
+		flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
+		flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+		break;
+	case FLOW_OFFLOAD_XMIT_XFRM:
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		if (!dst_hold_safe(route->tuple[dir].dst))
+			return -1;
+
+		flow_tuple->dst_cache = dst;
+		break;
+	}
+	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
 
 	return 0;
 }
 
+static void nft_flow_dst_release(struct flow_offload *flow,
+				 enum flow_offload_tuple_dir dir)
+{
+	if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
+		dst_release(flow->tuplehash[dir].tuple.dst_cache);
+}
+
 int flow_offload_route_init(struct flow_offload *flow,
 			    const struct nf_flow_route *route)
 {
@@ -118,7 +149,7 @@ int flow_offload_route_init(struct flow_offload *flow,
 	return 0;
 
 err_route_reply:
-	dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
 
 	return err;
 }
@@ -169,8 +200,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
 
 static void flow_offload_route_release(struct flow_offload *flow)
 {
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
 }
 
 void flow_offload_free(struct flow_offload *flow)
@@ -389,29 +420,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
 	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 }
 
-
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
-				__be16 port, __be16 new_port)
+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+				 __be16 port, __be16 new_port)
 {
 	struct tcphdr *tcph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
-
-	return 0;
 }
 
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
-				__be16 port, __be16 new_port)
+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+				 __be16 port, __be16 new_port)
 {
 	struct udphdr *udph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
 	udph = (void *)(skb_network_header(skb) + thoff);
 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 		inet_proto_csum_replace2(&udph->check, skb, port,
@@ -419,37 +441,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 		if (!udph->check)
 			udph->check = CSUM_MANGLED_0;
 	}
-
-	return 0;
 }
 
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
-			    u8 protocol, __be16 port, __be16 new_port)
+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+			     u8 protocol, __be16 port, __be16 new_port)
 {
 	switch (protocol) {
 	case IPPROTO_TCP:
-		if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
-			return NF_DROP;
+		nf_flow_nat_port_tcp(skb, thoff, port, new_port);
 		break;
 	case IPPROTO_UDP:
-		if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
-			return NF_DROP;
+		nf_flow_nat_port_udp(skb, thoff, port, new_port);
 		break;
 	}
-
-	return 0;
 }
 
-int nf_flow_snat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_snat_port(const struct flow_offload *flow,
+		       struct sk_buff *skb, unsigned int thoff,
+		       u8 protocol, enum flow_offload_tuple_dir dir)
 {
 	struct flow_ports *hdr;
 	__be16 port, new_port;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
-		return -1;
-
 	hdr = (void *)(skb_network_header(skb) + thoff);
 
 	switch (dir) {
@@ -463,24 +476,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
 		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
 		hdr->dest = new_port;
 		break;
-	default:
-		return -1;
 	}
 
-	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 }
 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
 
-int nf_flow_dnat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
+		       unsigned int thoff, u8 protocol,
+		       enum flow_offload_tuple_dir dir)
 {
 	struct flow_ports *hdr;
 	__be16 port, new_port;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
-		return -1;
-
 	hdr = (void *)(skb_network_header(skb) + thoff);
 
 	switch (dir) {
@@ -494,11 +502,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
 		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
 		hdr->source = new_port;
 		break;
-	default:
-		return -1;
 	}
 
-	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 }
 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index a698dbe28ef5..12cb0cc6958c 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -7,6 +7,9 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -34,28 +37,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
 	return 0;
 }
 
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
-			      __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+			       __be32 addr, __be32 new_addr)
 {
 	struct tcphdr *tcph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
-
-	return 0;
 }
 
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
-			      __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+			       __be32 addr, __be32 new_addr)
 {
 	struct udphdr *udph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
 	udph = (void *)(skb_network_header(skb) + thoff);
 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 		inet_proto_csum_replace4(&udph->check, skb, addr,
@@ -63,31 +58,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
 		if (!udph->check)
 			udph->check = CSUM_MANGLED_0;
 	}
-
-	return 0;
 }
 
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
-				  unsigned int thoff, __be32 addr,
-				  __be32 new_addr)
+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+				   unsigned int thoff, __be32 addr,
+				   __be32 new_addr)
 {
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
-		if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
 		break;
 	case IPPROTO_UDP:
-		if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
 		break;
 	}
-
-	return 0;
 }
 
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			   struct iphdr *iph, unsigned int thoff,
-			   enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ip(const struct flow_offload *flow,
+			    struct sk_buff *skb, struct iphdr *iph,
+			    unsigned int thoff, enum flow_offload_tuple_dir dir)
 {
 	__be32 addr, new_addr;
 
@@ -102,17 +91,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
 		iph->daddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 	csum_replace4(&iph->check, addr, new_addr);
 
-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 }
 
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			   struct iphdr *iph, unsigned int thoff,
-			   enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ip(const struct flow_offload *flow,
+			    struct sk_buff *skb, struct iphdr *iph,
+			    unsigned int thoff, enum flow_offload_tuple_dir dir)
 {
 	__be32 addr, new_addr;
 
@@ -127,31 +114,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
 		iph->saddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 	csum_replace4(&iph->check, addr, new_addr);
 
-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 }
 
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			  unsigned int thoff, enum flow_offload_tuple_dir dir)
+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+			  unsigned int thoff, enum flow_offload_tuple_dir dir,
+			  struct iphdr *iph)
 {
-	struct iphdr *iph = ip_hdr(skb);
-
-	if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
-	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
-		return -1;
-
-	iph = ip_hdr(skb);
-	if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
-	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
-		return -1;
-
-	return 0;
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
+		nf_flow_snat_ip(flow, skb, iph, thoff, dir);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
+		nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
+	}
 }
 
 static bool ip_has_options(unsigned int thoff)
@@ -159,29 +139,58 @@ static bool ip_has_options(unsigned int thoff)
 	return thoff != sizeof(struct iphdr);
 }
 
+static void nf_flow_tuple_encap(struct sk_buff *skb,
+				struct flow_offload_tuple *tuple)
+{
+	struct vlan_ethhdr *veth;
+	struct pppoe_hdr *phdr;
+	int i = 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		tuple->encap[i].id = skb_vlan_tag_get(skb);
+		tuple->encap[i].proto = skb->vlan_proto;
+		i++;
+	}
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
+		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+		tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
+		tuple->encap[i].proto = skb->protocol;
+		break;
+	case htons(ETH_P_PPP_SES):
+		phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+		tuple->encap[i].id = ntohs(phdr->sid);
+		tuple->encap[i].proto = skb->protocol;
+		break;
+	}
+}
+
 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
-			    struct flow_offload_tuple *tuple)
+			    struct flow_offload_tuple *tuple, u32 *hdrsize,
+			    u32 offset)
 {
-	unsigned int thoff, hdrsize;
 	struct flow_ports *ports;
+	unsigned int thoff;
 	struct iphdr *iph;
 
-	if (!pskb_may_pull(skb, sizeof(*iph)))
+	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
 		return -1;
 
-	iph = ip_hdr(skb);
-	thoff = iph->ihl * 4;
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+	thoff = (iph->ihl * 4);
 
 	if (ip_is_fragment(iph) ||
 	    unlikely(ip_has_options(thoff)))
 		return -1;
 
+	thoff += offset;
+
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
-		hdrsize = sizeof(struct tcphdr);
+		*hdrsize = sizeof(struct tcphdr);
 		break;
 	case IPPROTO_UDP:
-		hdrsize = sizeof(struct udphdr);
+		*hdrsize = sizeof(struct udphdr);
 		break;
 	default:
 		return -1;
@@ -190,11 +199,10 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 	if (iph->ttl <= 1)
 		return -1;
 
-	thoff = iph->ihl * 4;
-	if (!pskb_may_pull(skb, thoff + hdrsize))
+	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
-	iph = ip_hdr(skb);
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 
 	tuple->src_v4.s_addr	= iph->saddr;
@@ -204,6 +212,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 	tuple->l3proto		= AF_INET;
 	tuple->l4proto		= iph->protocol;
 	tuple->iifidx		= dev->ifindex;
+	nf_flow_tuple_encap(skb, tuple);
 
 	return 0;
 }
@@ -220,14 +229,6 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	return true;
 }
 
-static int nf_flow_offload_dst_check(struct dst_entry *dst)
-{
-	if (unlikely(dst_xfrm(dst)))
-		return dst_check(dst, 0) ? 0 : -1;
-
-	return 0;
-}
-
 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 				      const struct nf_hook_state *state,
 				      struct dst_entry *dst)
@@ -238,6 +239,91 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+{
+	__be16 proto;
+
+	proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+			     sizeof(struct pppoe_hdr)));
+	switch (proto) {
+	case htons(PPP_IP):
+		return htons(ETH_P_IP);
+	case htons(PPP_IPV6):
+		return htons(ETH_P_IPV6);
+	}
+
+	return 0;
+}
+
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+				       u32 *offset)
+{
+	struct vlan_ethhdr *veth;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
+		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+		if (veth->h_vlan_encapsulated_proto == proto) {
+			*offset += VLAN_HLEN;
+			return true;
+		}
+		break;
+	case htons(ETH_P_PPP_SES):
+		if (nf_flow_pppoe_proto(skb) == proto) {
+			*offset += PPPOE_SES_HLEN;
+			return true;
+		}
+		break;
+	}
+
+	return false;
+}
+
+static void nf_flow_encap_pop(struct sk_buff *skb,
+			      struct flow_offload_tuple_rhash *tuplehash)
+{
+	struct vlan_hdr *vlan_hdr;
+	int i;
+
+	for (i = 0; i < tuplehash->tuple.encap_num; i++) {
+		if (skb_vlan_tag_present(skb)) {
+			__vlan_hwaccel_clear_tag(skb);
+			continue;
+		}
+		switch (skb->protocol) {
+		case htons(ETH_P_8021Q):
+			vlan_hdr = (struct vlan_hdr *)skb->data;
+			__skb_pull(skb, VLAN_HLEN);
+			vlan_set_encap_proto(skb, vlan_hdr);
+			skb_reset_network_header(skb);
+			break;
+		case htons(ETH_P_PPP_SES):
+			skb->protocol = nf_flow_pppoe_proto(skb);
+			skb_pull(skb, PPPOE_SES_HLEN);
+			skb_reset_network_header(skb);
+			break;
+		}
+	}
+}
+
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+				       const struct flow_offload_tuple_rhash *tuplehash,
+				       unsigned short type)
+{
+	struct net_device *outdev;
+
+	outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
+	if (!outdev)
+		return NF_DROP;
+
+	skb->dev = outdev;
+	dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
+			tuplehash->tuple.out.h_source, skb->len);
+	dev_queue_xmit(skb);
+
+	return NF_STOLEN;
+}
+
 unsigned int
 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 			const struct nf_hook_state *state)
@@ -248,15 +334,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	enum flow_offload_tuple_dir dir;
 	struct flow_offload *flow;
 	struct net_device *outdev;
+	u32 hdrsize, offset = 0;
+	unsigned int thoff, mtu;
 	struct rtable *rt;
-	unsigned int thoff;
 	struct iphdr *iph;
 	__be32 nexthop;
+	int ret;
 
-	if (skb->protocol != htons(ETH_P_IP))
+	if (skb->protocol != htons(ETH_P_IP) &&
+	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
 		return NF_ACCEPT;
 
-	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+	if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
 		return NF_ACCEPT;
 
 	tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -265,77 +354,87 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-	outdev = rt->dst.dev;
 
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+	mtu = flow->tuplehash[dir].tuple.mtu + offset;
+	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 		return NF_ACCEPT;
 
-	if (skb_try_make_writable(skb, sizeof(*iph)))
-		return NF_DROP;
-
-	thoff = ip_hdr(skb)->ihl * 4;
-	if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+	thoff = (iph->ihl * 4) + offset;
+	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
-	flow_offload_refresh(flow_table, flow);
-
-	if (nf_flow_offload_dst_check(&rt->dst)) {
-		flow_offload_teardown(flow);
-		return NF_ACCEPT;
+	if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
+		if (!dst_check(&rt->dst, 0)) {
+			flow_offload_teardown(flow);
+			return NF_ACCEPT;
+		}
 	}
 
-	if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+	if (skb_try_make_writable(skb, thoff + hdrsize))
 		return NF_DROP;
 
+	flow_offload_refresh(flow_table, flow);
+
+	nf_flow_encap_pop(skb, tuplehash);
+	thoff -= offset;
+
 	iph = ip_hdr(skb);
+	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
+
 	ip_decrease_ttl(iph);
 	skb->tstamp = 0;
 
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	if (unlikely(dst_xfrm(&rt->dst))) {
+	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
 		IPCB(skb)->iif = skb->dev->ifindex;
 		IPCB(skb)->flags = IPSKB_FORWARDED;
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
-	skb->dev = outdev;
-	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
-	skb_dst_set_noref(skb, &rt->dst);
-	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
 
-	return NF_STOLEN;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
-				struct in6_addr *addr,
-				struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+				 struct in6_addr *addr,
+				 struct in6_addr *new_addr,
+				 struct ipv6hdr *ip6h)
 {
 	struct tcphdr *tcph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
 				  new_addr->s6_addr32, true);
-
-	return 0;
 }
 
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
-				struct in6_addr *addr,
-				struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+				 struct in6_addr *addr,
+				 struct in6_addr *new_addr)
 {
 	struct udphdr *udph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
 	udph = (void *)(skb_network_header(skb) + thoff);
 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
@@ -343,32 +442,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
 		if (!udph->check)
 			udph->check = CSUM_MANGLED_0;
 	}
-
-	return 0;
 }
 
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
-				    unsigned int thoff, struct in6_addr *addr,
-				    struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+				     unsigned int thoff, struct in6_addr *addr,
+				     struct in6_addr *new_addr)
 {
 	switch (ip6h->nexthdr) {
 	case IPPROTO_TCP:
-		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
 		break;
 	case IPPROTO_UDP:
-		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
 		break;
 	}
-
-	return 0;
 }
 
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
-			     unsigned int thoff,
-			     enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
+			      struct sk_buff *skb, struct ipv6hdr *ip6h,
+			      unsigned int thoff,
+			      enum flow_offload_tuple_dir dir)
 {
 	struct in6_addr addr, new_addr;
 
@@ -383,17 +476,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
 		ip6h->daddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 
-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 }
 
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
-			     unsigned int thoff,
-			     enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
+			      struct sk_buff *skb, struct ipv6hdr *ip6h,
+			      unsigned int thoff,
+			      enum flow_offload_tuple_dir dir)
 {
 	struct in6_addr addr, new_addr;
 
@@ -408,52 +499,48 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
 		ip6h->saddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 
-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 }
 
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
-			    struct sk_buff *skb,
-			    enum flow_offload_tuple_dir dir)
+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
+			     struct sk_buff *skb,
+			     enum flow_offload_tuple_dir dir,
+			     struct ipv6hdr *ip6h)
 {
-	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	unsigned int thoff = sizeof(*ip6h);
 
-	if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
-	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
-		return -1;
-
-	ip6h = ipv6_hdr(skb);
-	if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
-	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
-		return -1;
-
-	return 0;
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+		nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+		nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
+	}
 }
 
 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
-			      struct flow_offload_tuple *tuple)
+			      struct flow_offload_tuple *tuple, u32 *hdrsize,
+			      u32 offset)
 {
-	unsigned int thoff, hdrsize;
 	struct flow_ports *ports;
 	struct ipv6hdr *ip6h;
+	unsigned int thoff;
 
-	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+	thoff = sizeof(*ip6h) + offset;
+	if (!pskb_may_pull(skb, thoff))
 		return -1;
 
-	ip6h = ipv6_hdr(skb);
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 
 	switch (ip6h->nexthdr) {
 	case IPPROTO_TCP:
-		hdrsize = sizeof(struct tcphdr);
+		*hdrsize = sizeof(struct tcphdr);
 		break;
 	case IPPROTO_UDP:
-		hdrsize = sizeof(struct udphdr);
+		*hdrsize = sizeof(struct udphdr);
 		break;
 	default:
 		return -1;
@@ -462,11 +549,10 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 	if (ip6h->hop_limit <= 1)
 		return -1;
 
-	thoff = sizeof(*ip6h);
-	if (!pskb_may_pull(skb, thoff + hdrsize))
+	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
-	ip6h = ipv6_hdr(skb);
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 
 	tuple->src_v6		= ip6h->saddr;
@@ -476,6 +562,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 	tuple->l3proto		= AF_INET6;
 	tuple->l4proto		= ip6h->nexthdr;
 	tuple->iifidx		= dev->ifindex;
+	nf_flow_tuple_encap(skb, tuple);
 
 	return 0;
 }
@@ -491,13 +578,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	const struct in6_addr *nexthop;
 	struct flow_offload *flow;
 	struct net_device *outdev;
+	unsigned int thoff, mtu;
+	u32 hdrsize, offset = 0;
 	struct ipv6hdr *ip6h;
 	struct rt6_info *rt;
+	int ret;
 
-	if (skb->protocol != htons(ETH_P_IPV6))
+	if (skb->protocol != htons(ETH_P_IPV6) &&
+	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
 		return NF_ACCEPT;
 
-	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+	if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
 		return NF_ACCEPT;
 
 	tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -506,48 +597,64 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-	outdev = rt->dst.dev;
 
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+	mtu = flow->tuplehash[dir].tuple.mtu + offset;
+	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 		return NF_ACCEPT;
 
-	if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
-				sizeof(*ip6h)))
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+	thoff = sizeof(*ip6h) + offset;
+	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
 		return NF_ACCEPT;
 
-	flow_offload_refresh(flow_table, flow);
-
-	if (nf_flow_offload_dst_check(&rt->dst)) {
-		flow_offload_teardown(flow);
-		return NF_ACCEPT;
+	if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+		if (!dst_check(&rt->dst, 0)) {
+			flow_offload_teardown(flow);
+			return NF_ACCEPT;
+		}
 	}
 
-	if (skb_try_make_writable(skb, sizeof(*ip6h)))
+	if (skb_try_make_writable(skb, thoff + hdrsize))
 		return NF_DROP;
 
-	if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
-		return NF_DROP;
+	flow_offload_refresh(flow_table, flow);
+
+	nf_flow_encap_pop(skb, tuplehash);
 
 	ip6h = ipv6_hdr(skb);
+	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
+
 	ip6h->hop_limit--;
 	skb->tstamp = 0;
 
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	if (unlikely(dst_xfrm(&rt->dst))) {
+	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		IP6CB(skb)->iif = skb->dev->ifindex;
 		IP6CB(skb)->flags = IP6SKB_FORWARDED;
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
-	skb->dev = outdev;
-	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
-	skb_dst_set_noref(skb, &rt->dst);
-	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
 
-	return NF_STOLEN;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 2a6993fa40d7..7d0d128407be 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -13,7 +13,9 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-static struct workqueue_struct *nf_flow_offload_wq;
+static struct workqueue_struct *nf_flow_offload_add_wq;
+static struct workqueue_struct *nf_flow_offload_del_wq;
+static struct workqueue_struct *nf_flow_offload_stats_wq;
 
 struct flow_offload_work {
 	struct list_head	list;
@@ -175,28 +177,45 @@ static int flow_offload_eth_src(struct net *net,
 				enum flow_offload_tuple_dir dir,
 				struct nf_flow_rule *flow_rule)
 {
-	const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
-	struct net_device *dev;
+	const struct flow_offload_tuple *other_tuple, *this_tuple;
+	struct net_device *dev = NULL;
+	const unsigned char *addr;
 	u32 mask, val;
 	u16 val16;
 
-	dev = dev_get_by_index(net, tuple->iifidx);
-	if (!dev)
-		return -ENOENT;
+	this_tuple = &flow->tuplehash[dir].tuple;
+
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		addr = this_tuple->out.h_source;
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		dev = dev_get_by_index(net, other_tuple->iifidx);
+		if (!dev)
+			return -ENOENT;
+
+		addr = dev->dev_addr;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
 	mask = ~0xffff0000;
-	memcpy(&val16, dev->dev_addr, 2);
+	memcpy(&val16, addr, 2);
 	val = val16 << 16;
 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 			    &val, &mask);
 
 	mask = ~0xffffffff;
-	memcpy(&val, dev->dev_addr + 2, 4);
+	memcpy(&val, addr + 2, 4);
 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
 			    &val, &mask);
-	dev_put(dev);
+
+	if (dev)
+		dev_put(dev);
 
 	return 0;
 }
@@ -208,27 +227,40 @@ static int flow_offload_eth_dst(struct net *net,
 {
 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
-	const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
+	const struct flow_offload_tuple *other_tuple, *this_tuple;
 	const struct dst_entry *dst_cache;
 	unsigned char ha[ETH_ALEN];
 	struct neighbour *n;
+	const void *daddr;
 	u32 mask, val;
 	u8 nud_state;
 	u16 val16;
 
-	dst_cache = flow->tuplehash[dir].tuple.dst_cache;
-	n = dst_neigh_lookup(dst_cache, daddr);
-	if (!n)
-		return -ENOENT;
+	this_tuple = &flow->tuplehash[dir].tuple;
 
-	read_lock_bh(&n->lock);
-	nud_state = n->nud_state;
-	ether_addr_copy(ha, n->ha);
-	read_unlock_bh(&n->lock);
-
-	if (!(nud_state & NUD_VALID)) {
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ether_addr_copy(ha, this_tuple->out.h_dest);
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		daddr = &other_tuple->src_v4;
+		dst_cache = this_tuple->dst_cache;
+		n = dst_neigh_lookup(dst_cache, daddr);
+		if (!n)
+			return -ENOENT;
+
+		read_lock_bh(&n->lock);
+		nud_state = n->nud_state;
+		ether_addr_copy(ha, n->ha);
+		read_unlock_bh(&n->lock);
 		neigh_release(n);
-		return -ENOENT;
+
+		if (!(nud_state & NUD_VALID))
+			return -ENOENT;
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
 	mask = ~0xffffffff;
@@ -241,7 +273,6 @@ static int flow_offload_eth_dst(struct net *net,
 	val = val16;
 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 			    &val, &mask);
-	neigh_release(n);
 
 	return 0;
 }
@@ -463,27 +494,52 @@ static void flow_offload_ipv4_checksum(struct net *net,
 	}
 }
 
-static void flow_offload_redirect(const struct flow_offload *flow,
+static void flow_offload_redirect(struct net *net,
+				  const struct flow_offload *flow,
 				  enum flow_offload_tuple_dir dir,
 				  struct nf_flow_rule *flow_rule)
 {
-	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
-	struct rtable *rt;
+	const struct flow_offload_tuple *this_tuple, *other_tuple;
+	struct flow_action_entry *entry;
+	struct net_device *dev;
+	int ifindex;
+
+	this_tuple = &flow->tuplehash[dir].tuple;
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		this_tuple = &flow->tuplehash[dir].tuple;
+		ifindex = this_tuple->out.hw_ifidx;
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		ifindex = other_tuple->iifidx;
+		break;
+	default:
+		return;
+	}
 
-	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+	dev = dev_get_by_index(net, ifindex);
+	if (!dev)
+		return;
+
+	entry = flow_action_entry_next(flow_rule);
 	entry->id = FLOW_ACTION_REDIRECT;
-	entry->dev = rt->dst.dev;
-	dev_hold(rt->dst.dev);
+	entry->dev = dev;
 }
 
 static void flow_offload_encap_tunnel(const struct flow_offload *flow,
 				      enum flow_offload_tuple_dir dir,
 				      struct nf_flow_rule *flow_rule)
 {
+	const struct flow_offload_tuple *this_tuple;
 	struct flow_action_entry *entry;
 	struct dst_entry *dst;
 
-	dst = flow->tuplehash[dir].tuple.dst_cache;
+	this_tuple = &flow->tuplehash[dir].tuple;
+	if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+		return;
+
+	dst = this_tuple->dst_cache;
 	if (dst && dst->lwtstate) {
 		struct ip_tunnel_info *tun_info;
 
@@ -500,10 +556,15 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
 				      enum flow_offload_tuple_dir dir,
 				      struct nf_flow_rule *flow_rule)
 {
+	const struct flow_offload_tuple *other_tuple;
 	struct flow_action_entry *entry;
 	struct dst_entry *dst;
 
-	dst = flow->tuplehash[!dir].tuple.dst_cache;
+	other_tuple = &flow->tuplehash[!dir].tuple;
+	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+		return;
+
+	dst = other_tuple->dst_cache;
 	if (dst && dst->lwtstate) {
 		struct ip_tunnel_info *tun_info;
 
@@ -515,10 +576,14 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
 	}
 }
 
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
-			    enum flow_offload_tuple_dir dir,
-			    struct nf_flow_rule *flow_rule)
+static int
+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
+			  enum flow_offload_tuple_dir dir,
+			  struct nf_flow_rule *flow_rule)
 {
+	const struct flow_offload_tuple *other_tuple;
+	int i;
+
 	flow_offload_decap_tunnel(flow, dir, flow_rule);
 	flow_offload_encap_tunnel(flow, dir, flow_rule);
 
@@ -526,6 +591,39 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
 		return -1;
 
+	other_tuple = &flow->tuplehash[!dir].tuple;
+
+	for (i = 0; i < other_tuple->encap_num; i++) {
+		struct flow_action_entry *entry;
+
+		if (other_tuple->in_vlan_ingress & BIT(i))
+			continue;
+
+		entry = flow_action_entry_next(flow_rule);
+
+		switch (other_tuple->encap[i].proto) {
+		case htons(ETH_P_PPP_SES):
+			entry->id = FLOW_ACTION_PPPOE_PUSH;
+			entry->pppoe.sid = other_tuple->encap[i].id;
+			break;
+		case htons(ETH_P_8021Q):
+			entry->id = FLOW_ACTION_VLAN_PUSH;
+			entry->vlan.vid = other_tuple->encap[i].id;
+			entry->vlan.proto = other_tuple->encap[i].proto;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+			    enum flow_offload_tuple_dir dir,
+			    struct nf_flow_rule *flow_rule)
+{
+	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
+		return -1;
+
 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 		flow_offload_ipv4_snat(net, flow, dir, flow_rule);
 		flow_offload_port_snat(net, flow, dir, flow_rule);
@@ -538,7 +636,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
 	    test_bit(NF_FLOW_DNAT, &flow->flags))
 		flow_offload_ipv4_checksum(net, flow, flow_rule);
 
-	flow_offload_redirect(flow, dir, flow_rule);
+	flow_offload_redirect(net, flow, dir, flow_rule);
 
 	return 0;
 }
@@ -548,11 +646,7 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
 			    enum flow_offload_tuple_dir dir,
 			    struct nf_flow_rule *flow_rule)
 {
-	flow_offload_decap_tunnel(flow, dir, flow_rule);
-	flow_offload_encap_tunnel(flow, dir, flow_rule);
-
-	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
-	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
+	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
 		return -1;
 
 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
@@ -564,7 +658,7 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
 		flow_offload_port_dnat(net, flow, dir, flow_rule);
 	}
 
-	flow_offload_redirect(flow, dir, flow_rule);
+	flow_offload_redirect(net, flow, dir, flow_rule);
 
 	return 0;
 }
@@ -578,10 +672,10 @@ nf_flow_offload_rule_alloc(struct net *net,
 			   enum flow_offload_tuple_dir dir)
 {
 	const struct nf_flowtable *flowtable = offload->flowtable;
+	const struct flow_offload_tuple *tuple, *other_tuple;
 	const struct flow_offload *flow = offload->flow;
-	const struct flow_offload_tuple *tuple;
+	struct dst_entry *other_dst = NULL;
 	struct nf_flow_rule *flow_rule;
-	struct dst_entry *other_dst;
 	int err = -ENOMEM;
 
 	flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
@@ -597,7 +691,10 @@ nf_flow_offload_rule_alloc(struct net *net,
 	flow_rule->rule->match.key = &flow_rule->match.key;
 
 	tuple = &flow->tuplehash[dir].tuple;
-	other_dst = flow->tuplehash[!dir].tuple.dst_cache;
+	other_tuple = &flow->tuplehash[!dir].tuple;
+	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
+		other_dst = other_tuple->dst_cache;
+
 	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
 	if (err < 0)
 		goto err_flow_match;
@@ -826,7 +923,12 @@ static void flow_offload_work_handler(struct work_struct *work)
 
 static void flow_offload_queue_work(struct flow_offload_work *offload)
 {
-	queue_work(nf_flow_offload_wq, &offload->work);
+	if (offload->cmd == FLOW_CLS_REPLACE)
+		queue_work(nf_flow_offload_add_wq, &offload->work);
+	else if (offload->cmd == FLOW_CLS_DESTROY)
+		queue_work(nf_flow_offload_del_wq, &offload->work);
+	else
+		queue_work(nf_flow_offload_stats_wq, &offload->work);
 }
 
 static struct flow_offload_work *
@@ -898,8 +1000,11 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
 
 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
 {
-	if (nf_flowtable_hw_offload(flowtable))
-		flush_workqueue(nf_flow_offload_wq);
+	if (nf_flowtable_hw_offload(flowtable)) {
+		flush_workqueue(nf_flow_offload_add_wq);
+		flush_workqueue(nf_flow_offload_del_wq);
+		flush_workqueue(nf_flow_offload_stats_wq);
+	}
 }
 
 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
@@ -1011,15 +1116,33 @@ EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
 
 int nf_flow_table_offload_init(void)
 {
-	nf_flow_offload_wq  = alloc_workqueue("nf_flow_table_offload",
-					      WQ_UNBOUND, 0);
-	if (!nf_flow_offload_wq)
+	nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
+						  WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_add_wq)
 		return -ENOMEM;
 
+	nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
+						  WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_del_wq)
+		goto err_del_wq;
+
+	nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
+						    WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_stats_wq)
+		goto err_stats_wq;
+
 	return 0;
+
+err_stats_wq:
+	destroy_workqueue(nf_flow_offload_del_wq);
+err_del_wq:
+	destroy_workqueue(nf_flow_offload_add_wq);
+	return -ENOMEM;
 }
 
 void nf_flow_table_offload_exit(void)
 {
-	destroy_workqueue(nf_flow_offload_wq);
+	destroy_workqueue(nf_flow_offload_add_wq);
+	destroy_workqueue(nf_flow_offload_del_wq);
+	destroy_workqueue(nf_flow_offload_stats_wq);
 }
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f57f1a6ba96f..fc2526b8bd55 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -900,6 +900,12 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
 	nft_table_disable(net, table, 0);
 }
 
+enum {
+	NFT_TABLE_STATE_UNCHANGED	= 0,
+	NFT_TABLE_STATE_DORMANT,
+	NFT_TABLE_STATE_WAKEUP
+};
+
 static int nf_tables_updtable(struct nft_ctx *ctx)
 {
 	struct nft_trans *trans;
@@ -929,19 +935,17 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 
 	if ((flags & NFT_TABLE_F_DORMANT) &&
 	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
-		nft_trans_table_enable(trans) = false;
+		nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
 	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
 		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
-		ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
 		ret = nf_tables_table_enable(ctx->net, ctx->table);
 		if (ret >= 0)
-			nft_trans_table_enable(trans) = true;
-		else
-			ctx->table->flags |= NFT_TABLE_F_DORMANT;
+			nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
 	}
 	if (ret < 0)
 		goto err;
 
+	nft_trans_table_flags(trans) = flags;
 	nft_trans_table_update(trans) = true;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -8086,11 +8090,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
-				if (!nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(net,
-								trans->ctx.table);
-					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
-				}
+				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+					nf_tables_table_disable(net, trans->ctx.table);
+
+				trans->ctx.table->flags = nft_trans_table_flags(trans);
 			} else {
 				nft_clear(net, trans->ctx.table);
 			}
@@ -8303,11 +8306,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
-				if (nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(net,
-								trans->ctx.table);
-					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
-				}
+				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+					nf_tables_table_disable(net, trans->ctx.table);
+
 				nft_trans_destroy(trans);
 			} else {
 				list_del_rcu(&trans->ctx.table->list);
@@ -8577,6 +8578,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 							data->verdict.chain);
 				if (err < 0)
 					return err;
+				break;
 			default:
 				break;
 			}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 882fe8648653..0592a9456084 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -527,6 +527,7 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
 	case NFT_CT_ZONE:
 		if (--nft_ct_pcpu_template_refcnt == 0)
 			nft_ct_tmpl_put_pcpu();
+		break;
 #endif
 	default:
 		break;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 3a6c84fb2c90..4843dd2b410c 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -19,10 +19,205 @@ struct nft_flow_offload {
 	struct nft_flowtable	*flowtable;
 };
 
+static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
+{
+	if (dst_xfrm(dst))
+		return FLOW_OFFLOAD_XMIT_XFRM;
+
+	return FLOW_OFFLOAD_XMIT_NEIGH;
+}
+
+static void nft_default_forward_path(struct nf_flow_route *route,
+				     struct dst_entry *dst_cache,
+				     enum ip_conntrack_dir dir)
+{
+	route->tuple[!dir].in.ifindex	= dst_cache->dev->ifindex;
+	route->tuple[dir].dst		= dst_cache;
+	route->tuple[dir].xmit_type	= nft_xmit_type(dst_cache);
+}
+
+static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
+				     const struct dst_entry *dst_cache,
+				     const struct nf_conn *ct,
+				     enum ip_conntrack_dir dir, u8 *ha,
+				     struct net_device_path_stack *stack)
+{
+	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
+	struct net_device *dev = dst_cache->dev;
+	struct neighbour *n;
+	u8 nud_state;
+
+	n = dst_neigh_lookup(dst_cache, daddr);
+	if (!n)
+		return -1;
+
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	ether_addr_copy(ha, n->ha);
+	read_unlock_bh(&n->lock);
+	neigh_release(n);
+
+	if (!(nud_state & NUD_VALID))
+		return -1;
+
+	return dev_fill_forward_path(dev, ha, stack);
+}
+
+struct nft_forward_info {
+	const struct net_device *indev;
+	const struct net_device *outdev;
+	const struct net_device *hw_outdev;
+	struct id {
+		__u16	id;
+		__be16	proto;
+	} encap[NF_FLOW_TABLE_ENCAP_MAX];
+	u8 num_encaps;
+	u8 ingress_vlans;
+	u8 h_source[ETH_ALEN];
+	u8 h_dest[ETH_ALEN];
+	enum flow_offload_xmit_type xmit_type;
+};
+
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+		return false;
+
+	return true;
+}
+
+static void nft_dev_path_info(const struct net_device_path_stack *stack,
+			      struct nft_forward_info *info,
+			      unsigned char *ha, struct nf_flowtable *flowtable)
+{
+	const struct net_device_path *path;
+	int i;
+
+	memcpy(info->h_dest, ha, ETH_ALEN);
+
+	for (i = 0; i < stack->num_paths; i++) {
+		path = &stack->path[i];
+		switch (path->type) {
+		case DEV_PATH_ETHERNET:
+		case DEV_PATH_DSA:
+		case DEV_PATH_VLAN:
+		case DEV_PATH_PPPOE:
+			info->indev = path->dev;
+			if (is_zero_ether_addr(info->h_source))
+				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+			if (path->type == DEV_PATH_ETHERNET)
+				break;
+			if (path->type == DEV_PATH_DSA) {
+				i = stack->num_paths;
+				break;
+			}
+
+			/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
+			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+				info->indev = NULL;
+				break;
+			}
+			info->outdev = path->dev;
+			info->encap[info->num_encaps].id = path->encap.id;
+			info->encap[info->num_encaps].proto = path->encap.proto;
+			info->num_encaps++;
+			if (path->type == DEV_PATH_PPPOE)
+				memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
+			break;
+		case DEV_PATH_BRIDGE:
+			if (is_zero_ether_addr(info->h_source))
+				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+			switch (path->bridge.vlan_mode) {
+			case DEV_PATH_BR_VLAN_UNTAG_HW:
+				info->ingress_vlans |= BIT(info->num_encaps - 1);
+				break;
+			case DEV_PATH_BR_VLAN_TAG:
+				info->encap[info->num_encaps].id = path->bridge.vlan_id;
+				info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
+				info->num_encaps++;
+				break;
+			case DEV_PATH_BR_VLAN_UNTAG:
+				info->num_encaps--;
+				break;
+			case DEV_PATH_BR_VLAN_KEEP:
+				break;
+			}
+			info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+			break;
+		default:
+			info->indev = NULL;
+			break;
+		}
+	}
+	if (!info->outdev)
+		info->outdev = info->indev;
+
+	info->hw_outdev = info->indev;
+
+	if (nf_flowtable_hw_offload(flowtable) &&
+	    nft_is_valid_ether_device(info->indev))
+		info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+}
+
+static bool nft_flowtable_find_dev(const struct net_device *dev,
+				   struct nft_flowtable *ft)
+{
+	struct nft_hook *hook;
+	bool found = false;
+
+	list_for_each_entry_rcu(hook, &ft->hook_list, list) {
+		if (hook->ops.dev != dev)
+			continue;
+
+		found = true;
+		break;
+	}
+
+	return found;
+}
+
+static void nft_dev_forward_path(struct nf_flow_route *route,
+				 const struct nf_conn *ct,
+				 enum ip_conntrack_dir dir,
+				 struct nft_flowtable *ft)
+{
+	const struct dst_entry *dst = route->tuple[dir].dst;
+	struct net_device_path_stack stack;
+	struct nft_forward_info info = {};
+	unsigned char ha[ETH_ALEN];
+	int i;
+
+	if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+		nft_dev_path_info(&stack, &info, ha, &ft->data);
+
+	if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
+		return;
+
+	route->tuple[!dir].in.ifindex = info.indev->ifindex;
+	for (i = 0; i < info.num_encaps; i++) {
+		route->tuple[!dir].in.encap[i].id = info.encap[i].id;
+		route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
+	}
+	route->tuple[!dir].in.num_encaps = info.num_encaps;
+	route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
+
+	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
+		memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
+		route->tuple[dir].out.ifindex = info.outdev->ifindex;
+		route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
+		route->tuple[dir].xmit_type = info.xmit_type;
+	}
+}
+
 static int nft_flow_route(const struct nft_pktinfo *pkt,
 			  const struct nf_conn *ct,
 			  struct nf_flow_route *route,
-			  enum ip_conntrack_dir dir)
+			  enum ip_conntrack_dir dir,
+			  struct nft_flowtable *ft)
 {
 	struct dst_entry *this_dst = skb_dst(pkt->skb);
 	struct dst_entry *other_dst = NULL;
@@ -44,8 +239,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
 	if (!other_dst)
 		return -ENOENT;
 
-	route->tuple[dir].dst		= this_dst;
-	route->tuple[!dir].dst		= other_dst;
+	nft_default_forward_path(route, this_dst, dir);
+	nft_default_forward_path(route, other_dst, !dir);
+
+	if (route->tuple[dir].xmit_type	== FLOW_OFFLOAD_XMIT_NEIGH &&
+	    route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
+		nft_dev_forward_path(route, ct, dir, ft);
+		nft_dev_forward_path(route, ct, !dir, ft);
+	}
 
 	return 0;
 }
@@ -74,8 +275,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	struct nft_flow_offload *priv = nft_expr_priv(expr);
 	struct nf_flowtable *flowtable = &priv->flowtable->data;
 	struct tcphdr _tcph, *tcph = NULL;
+	struct nf_flow_route route = {};
 	enum ip_conntrack_info ctinfo;
-	struct nf_flow_route route;
 	struct flow_offload *flow;
 	enum ip_conntrack_dir dir;
 	struct nf_conn *ct;
@@ -112,7 +313,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 		goto out;
 
 	dir = CTINFO2DIR(ctinfo);
-	if (nft_flow_route(pkt, ct, &route, dir) < 0)
+	if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0)
 		goto err_flow_route;
 
 	flow = flow_offload_alloc(ct);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 4ed7e52c7012..88deb5b41429 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -497,10 +497,12 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
 
 	if (unlikely(packet_length(skb, vport->dev) > mtu &&
 		     !skb_is_gso(skb))) {
-		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
-				     vport->dev->name,
-				     packet_length(skb, vport->dev), mtu);
 		vport->dev->stats.tx_errors++;
+		if (vport->dev->flags & IFF_UP)
+			net_warn_ratelimited("%s: dropped over-mtu packet: "
+					     "%d > %d\n", vport->dev->name,
+					     packet_length(skb, vport->dev),
+					     mtu);
 		goto drop;
 	}
 
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 1eb7495ac5b4..8a930ca6d6b1 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -20,7 +20,7 @@
 struct vport;
 struct vport_parms;
 
-/* The following definitions are for users of the vport subsytem: */
+/* The following definitions are for users of the vport subsystem: */
 
 int ovs_vport_init(void);
 void ovs_vport_exit(void);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e24b2841c643..118d585337d7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2057,7 +2057,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
  * and skb->cb are mangled. It works because (and until) packets
  * falling here are owned by current CPU. Output packets are cloned
  * by dev_queue_xmit_nit(), input packets are processed by net_bh
- * sequencially, so that if we return skb to original state on exit,
+ * sequentially, so that if we return skb to original state on exit,
  * we will not harm anyone.
  */
 
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 482c07f2766b..118d5d2a81a0 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/module.h>
+#include <linux/timekeeping.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/netlink.h>
@@ -356,9 +357,12 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
 #endif
 
 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
-			   u32 trunc_size, int in_ifindex, int out_ifindex,
-			   u32 sample_rate)
+			   u32 sample_rate, const struct psample_metadata *md)
 {
+	ktime_t tstamp = ktime_get_real();
+	int out_ifindex = md->out_ifindex;
+	int in_ifindex = md->in_ifindex;
+	u32 trunc_size = md->trunc_size;
 #ifdef CONFIG_INET
 	struct ip_tunnel_info *tun_info;
 #endif
@@ -370,10 +374,15 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 
 	meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
 		   (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
+		   (md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) +
+		   (md->out_tc_occ_valid ? nla_total_size_64bit(sizeof(u64)) : 0) +
+		   (md->latency_valid ? nla_total_size_64bit(sizeof(u64)) : 0) +
 		   nla_total_size(sizeof(u32)) +	/* sample_rate */
 		   nla_total_size(sizeof(u32)) +	/* orig_size */
 		   nla_total_size(sizeof(u32)) +	/* group_num */
-		   nla_total_size(sizeof(u32));		/* seq */
+		   nla_total_size(sizeof(u32)) +	/* seq */
+		   nla_total_size_64bit(sizeof(u64)) +	/* timestamp */
+		   nla_total_size(sizeof(u16));		/* protocol */
 
 #ifdef CONFIG_INET
 	tun_info = skb_tunnel_info(skb);
@@ -423,6 +432,36 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 	if (unlikely(ret < 0))
 		goto error;
 
+	if (md->out_tc_valid) {
+		ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OUT_TC, md->out_tc);
+		if (unlikely(ret < 0))
+			goto error;
+	}
+
+	if (md->out_tc_occ_valid) {
+		ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_OUT_TC_OCC,
+					md->out_tc_occ, PSAMPLE_ATTR_PAD);
+		if (unlikely(ret < 0))
+			goto error;
+	}
+
+	if (md->latency_valid) {
+		ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_LATENCY,
+					md->latency, PSAMPLE_ATTR_PAD);
+		if (unlikely(ret < 0))
+			goto error;
+	}
+
+	ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_TIMESTAMP,
+				ktime_to_ns(tstamp), PSAMPLE_ATTR_PAD);
+	if (unlikely(ret < 0))
+		goto error;
+
+	ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_PROTO,
+			  be16_to_cpu(skb->protocol));
+	if (unlikely(ret < 0))
+		goto error;
+
 	if (data_len) {
 		int nla_len = nla_total_size(data_len);
 		struct nlattr *nla;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index aba4afe4dfed..4db109fb6ec2 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -722,8 +722,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 		if (msg->msg_name) {
 			if (ipv6_addr_v4mapped(&inc->i_saddr)) {
-				sin = (struct sockaddr_in *)msg->msg_name;
-
 				sin->sin_family = AF_INET;
 				sin->sin_port = inc->i_hdr.h_sport;
 				sin->sin_addr.s_addr =
@@ -731,8 +729,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 				msg->msg_namelen = sizeof(*sin);
 			} else {
-				sin6 = (struct sockaddr_in6 *)msg->msg_name;
-
 				sin6->sin6_family = AF_INET6;
 				sin6->sin6_port = inc->i_hdr.h_sport;
 				sin6->sin6_addr = inc->i_saddr;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 6e35703ff353..c0e04c261a15 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -347,6 +347,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
 				case 1:
 					rose_node->neighbour[1] =
 						rose_node->neighbour[2];
+					break;
 				case 2:
 					break;
 				}
@@ -508,6 +509,7 @@ void rose_rt_device_down(struct net_device *dev)
 					fallthrough;
 				case 1:
 					t->neighbour[1] = t->neighbour[2];
+					break;
 				case 2:
 					break;
 				}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8d8452b1cdd4..0fab8de176d2 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -42,6 +42,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
 	[TCA_POLICE_RATE64]     = { .type = NLA_U64 },
 	[TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
+	[TCA_POLICE_PKTRATE64]  = { .type = NLA_U64, .min = 1 },
+	[TCA_POLICE_PKTBURST64] = { .type = NLA_U64, .min = 1 },
 };
 
 static int tcf_police_init(struct net *net, struct nlattr *nla,
@@ -61,6 +63,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	u32 index;
 	u64 rate64, prate64;
+	u64 pps, ppsburst;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -142,6 +145,21 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 		}
 	}
 
+	if ((tb[TCA_POLICE_PKTRATE64] && !tb[TCA_POLICE_PKTBURST64]) ||
+	    (!tb[TCA_POLICE_PKTRATE64] && tb[TCA_POLICE_PKTBURST64])) {
+		NL_SET_ERR_MSG(extack,
+			       "Both or neither packet-per-second burst and rate must be provided");
+		err = -EINVAL;
+		goto failure;
+	}
+
+	if (tb[TCA_POLICE_PKTRATE64] && R_tab) {
+		NL_SET_ERR_MSG(extack,
+			       "packet-per-second and byte-per-second rate limits not allowed in same action");
+		err = -EINVAL;
+		goto failure;
+	}
+
 	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (unlikely(!new)) {
 		err = -ENOMEM;
@@ -183,6 +201,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	if (tb[TCA_POLICE_AVRATE])
 		new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
 
+	if (tb[TCA_POLICE_PKTRATE64]) {
+		pps = nla_get_u64(tb[TCA_POLICE_PKTRATE64]);
+		ppsburst = nla_get_u64(tb[TCA_POLICE_PKTBURST64]);
+		new->pps_present = true;
+		new->tcfp_pkt_burst = PSCHED_TICKS2NS(ppsburst);
+		psched_ppscfg_precompute(&new->ppsrate, pps);
+	}
+
 	spin_lock_bh(&police->tcf_lock);
 	spin_lock_bh(&police->tcfp_lock);
 	police->tcfp_t_c = ktime_get_ns();
@@ -217,8 +243,8 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
 	struct tcf_police *police = to_police(a);
+	s64 now, toks, ppstoks = 0, ptoks = 0;
 	struct tcf_police_params *p;
-	s64 now, toks, ptoks = 0;
 	int ret;
 
 	tcf_lastuse_update(&police->tcf_tm);
@@ -236,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
-		if (!p->rate_present) {
+		if (!p->rate_present && !p->pps_present) {
 			ret = p->tcfp_result;
 			goto end;
 		}
@@ -251,14 +277,23 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 			ptoks -= (s64)psched_l2t_ns(&p->peak,
 						    qdisc_pkt_len(skb));
 		}
-		toks += police->tcfp_toks;
-		if (toks > p->tcfp_burst)
-			toks = p->tcfp_burst;
-		toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
-		if ((toks|ptoks) >= 0) {
+		if (p->rate_present) {
+			toks += police->tcfp_toks;
+			if (toks > p->tcfp_burst)
+				toks = p->tcfp_burst;
+			toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
+		} else if (p->pps_present) {
+			ppstoks = min_t(s64, now - police->tcfp_t_c, p->tcfp_pkt_burst);
+			ppstoks += police->tcfp_pkttoks;
+			if (ppstoks > p->tcfp_pkt_burst)
+				ppstoks = p->tcfp_pkt_burst;
+			ppstoks -= (s64)psched_pkt2t_ns(&p->ppsrate, 1);
+		}
+		if ((toks | ptoks | ppstoks) >= 0) {
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
 			police->tcfp_ptoks = ptoks;
+			police->tcfp_pkttoks = ppstoks;
 			spin_unlock_bh(&police->tcfp_lock);
 			ret = p->tcfp_result;
 			goto inc_drops;
@@ -331,6 +366,16 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
 				      TCA_POLICE_PAD))
 			goto nla_put_failure;
 	}
+	if (p->pps_present) {
+		if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
+				      police->params->ppsrate.rate_pkts_ps,
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+		if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
+				      PSCHED_NS2TICKS(p->tcfp_pkt_burst),
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+	}
 	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
 		goto nla_put_failure;
 	if (p->tcfp_result &&
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 3ebf9ede3cf1..6a0c16e4351d 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -158,10 +158,8 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 {
 	struct tcf_sample *s = to_sample(a);
 	struct psample_group *psample_group;
+	struct psample_metadata md = {};
 	int retval;
-	int size;
-	int iif;
-	int oif;
 
 	tcf_lastuse_update(&s->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
@@ -172,20 +170,18 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 	/* randomly sample packets according to rate */
 	if (psample_group && (prandom_u32() % s->rate == 0)) {
 		if (!skb_at_tc_ingress(skb)) {
-			iif = skb->skb_iif;
-			oif = skb->dev->ifindex;
+			md.in_ifindex = skb->skb_iif;
+			md.out_ifindex = skb->dev->ifindex;
 		} else {
-			iif = skb->dev->ifindex;
-			oif = 0;
+			md.in_ifindex = skb->dev->ifindex;
 		}
 
 		/* on ingress, the mac header gets popped, so push it back */
 		if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
 			skb_push(skb, skb->mac_len);
 
-		size = s->truncate ? s->trunc_size : skb->len;
-		psample_sample_packet(psample_group, skb, size, iif, oif,
-				      s->rate);
+		md.trunc_size = s->truncate ? s->trunc_size : skb->len;
+		psample_sample_packet(psample_group, skb, s->rate, &md);
 
 		if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
 			skb_pull(skb, skb->mac_len);
@@ -194,6 +190,16 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 	return retval;
 }
 
+static void tcf_sample_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+				    u64 drops, u64 lastuse, bool hw)
+{
+	struct tcf_sample *s = to_sample(a);
+	struct tcf_t *tm = &s->tcf_tm;
+
+	tcf_action_update_stats(a, bytes, packets, drops, hw);
+	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
 static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
 			   int bind, int ref)
 {
@@ -280,6 +286,7 @@ static struct tc_action_ops act_sample_ops = {
 	.id	  = TCA_ID_SAMPLE,
 	.owner	  = THIS_MODULE,
 	.act	  = tcf_sample_act,
+	.stats_update = tcf_sample_stats_update,
 	.dump	  = tcf_sample_dump,
 	.init	  = tcf_sample_init,
 	.cleanup  = tcf_sample_cleanup,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 13341e7fb077..d3db70865d66 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3662,6 +3662,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			entry->police.burst = tcf_police_burst(act);
 			entry->police.rate_bytes_ps =
 				tcf_police_rate_bytes_ps(act);
+			entry->police.burst_pkt = tcf_police_burst_pkt(act);
+			entry->police.rate_pkt_ps =
+				tcf_police_rate_pkt_ps(act);
 			entry->police.mtu = tcf_police_tcfp_mtu(act);
 			entry->police.index = act->tcfa_index;
 		} else if (is_tcf_ct(act)) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c69a4ba9c33f..d7869a984881 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -209,16 +209,16 @@ static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
 				  struct fl_flow_key *key,
 				  struct fl_flow_key *mkey)
 {
-	__be16 min_mask, max_mask, min_val, max_val;
+	u16 min_mask, max_mask, min_val, max_val;
 
-	min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
-	max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
-	min_val = htons(filter->key.tp_range.tp_min.dst);
-	max_val = htons(filter->key.tp_range.tp_max.dst);
+	min_mask = ntohs(filter->mask->key.tp_range.tp_min.dst);
+	max_mask = ntohs(filter->mask->key.tp_range.tp_max.dst);
+	min_val = ntohs(filter->key.tp_range.tp_min.dst);
+	max_val = ntohs(filter->key.tp_range.tp_max.dst);
 
 	if (min_mask && max_mask) {
-		if (htons(key->tp_range.tp.dst) < min_val ||
-		    htons(key->tp_range.tp.dst) > max_val)
+		if (ntohs(key->tp_range.tp.dst) < min_val ||
+		    ntohs(key->tp_range.tp.dst) > max_val)
 			return false;
 
 		/* skb does not have min and max values */
@@ -232,16 +232,16 @@ static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
 				  struct fl_flow_key *key,
 				  struct fl_flow_key *mkey)
 {
-	__be16 min_mask, max_mask, min_val, max_val;
+	u16 min_mask, max_mask, min_val, max_val;
 
-	min_mask = htons(filter->mask->key.tp_range.tp_min.src);
-	max_mask = htons(filter->mask->key.tp_range.tp_max.src);
-	min_val = htons(filter->key.tp_range.tp_min.src);
-	max_val = htons(filter->key.tp_range.tp_max.src);
+	min_mask = ntohs(filter->mask->key.tp_range.tp_min.src);
+	max_mask = ntohs(filter->mask->key.tp_range.tp_max.src);
+	min_val = ntohs(filter->key.tp_range.tp_min.src);
+	max_val = ntohs(filter->key.tp_range.tp_max.src);
 
 	if (min_mask && max_mask) {
-		if (htons(key->tp_range.tp.src) < min_val ||
-		    htons(key->tp_range.tp.src) > max_val)
+		if (ntohs(key->tp_range.tp.src) < min_val ||
+		    ntohs(key->tp_range.tp.src) > max_val)
 			return false;
 
 		/* skb does not have min and max values */
@@ -783,16 +783,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
 		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
 
 	if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
-	    htons(key->tp_range.tp_max.dst) <=
-	    htons(key->tp_range.tp_min.dst)) {
+	    ntohs(key->tp_range.tp_max.dst) <=
+	    ntohs(key->tp_range.tp_min.dst)) {
 		NL_SET_ERR_MSG_ATTR(extack,
 				    tb[TCA_FLOWER_KEY_PORT_DST_MIN],
 				    "Invalid destination port range (min must be strictly smaller than max)");
 		return -EINVAL;
 	}
 	if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
-	    htons(key->tp_range.tp_max.src) <=
-	    htons(key->tp_range.tp_min.src)) {
+	    ntohs(key->tp_range.tp_max.src) <=
+	    ntohs(key->tp_range.tp_min.src)) {
 		NL_SET_ERR_MSG_ATTR(extack,
 				    tb[TCA_FLOWER_KEY_PORT_SRC_MIN],
 				    "Invalid source port range (min must be strictly smaller than max)");
@@ -1044,8 +1044,8 @@ static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key,
 		return -EINVAL;
 	}
 
-	key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
-	mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+	key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS]));
+	mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
 
 	*flags_key  = 0;
 	*flags_mask = 0;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 320b3d31fa97..b79a7e27bb31 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -263,7 +263,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		/*
 		 * Step 3+n. If classifier selected a link sharing class,
 		 *	   apply agency specific classifier.
-		 *	   Repeat this procdure until we hit a leaf node.
+		 *	   Repeat this procedure until we hit a leaf node.
 		 */
 		head = cl;
 	}
@@ -859,7 +859,7 @@ cbq_dequeue(struct Qdisc *sch)
 	return NULL;
 }
 
-/* CBQ class maintanance routines */
+/* CBQ class maintenance routines */
 
 static void cbq_adjust_levels(struct cbq_class *this)
 {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 49eae93d1489..44991ea726fc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1325,6 +1325,48 @@ void dev_shutdown(struct net_device *dev)
 	WARN_ON(timer_pending(&dev->watchdog_timer));
 }
 
+/**
+ * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division
+ * @rate:   Rate to compute reciprocal division values of
+ * @mult:   Multiplier for reciprocal division
+ * @shift:  Shift for reciprocal division
+ *
+ * The multiplier and shift for reciprocal division by rate are stored
+ * in mult and shift.
+ *
+ * The deal here is to replace a divide by a reciprocal one
+ * in fast path (a reciprocal divide is a multiply and a shift)
+ *
+ * Normal formula would be :
+ *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
+ *
+ * We compute mult/shift to use instead :
+ *  time_in_ns = (len * mult) >> shift;
+ *
+ * We try to get the highest possible mult value for accuracy,
+ * but have to make sure no overflows will ever happen.
+ *
+ * reciprocal_value() is not used here it doesn't handle 64-bit values.
+ */
+static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift)
+{
+	u64 factor = NSEC_PER_SEC;
+
+	*mult = 1;
+	*shift = 0;
+
+	if (rate <= 0)
+		return;
+
+	for (;;) {
+		*mult = div64_u64(factor, rate);
+		if (*mult & (1U << 31) || factor & (1ULL << 63))
+			break;
+		factor <<= 1;
+		(*shift)++;
+	}
+}
+
 void psched_ratecfg_precompute(struct psched_ratecfg *r,
 			       const struct tc_ratespec *conf,
 			       u64 rate64)
@@ -1333,34 +1375,17 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
 	r->overhead = conf->overhead;
 	r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
 	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
-	r->mult = 1;
-	/*
-	 * The deal here is to replace a divide by a reciprocal one
-	 * in fast path (a reciprocal divide is a multiply and a shift)
-	 *
-	 * Normal formula would be :
-	 *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
-	 *
-	 * We compute mult/shift to use instead :
-	 *  time_in_ns = (len * mult) >> shift;
-	 *
-	 * We try to get the highest possible mult value for accuracy,
-	 * but have to make sure no overflows will ever happen.
-	 */
-	if (r->rate_bytes_ps > 0) {
-		u64 factor = NSEC_PER_SEC;
-
-		for (;;) {
-			r->mult = div64_u64(factor, r->rate_bytes_ps);
-			if (r->mult & (1U << 31) || factor & (1ULL << 63))
-				break;
-			factor <<= 1;
-			r->shift++;
-		}
-	}
+	psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
 }
 EXPORT_SYMBOL(psched_ratecfg_precompute);
 
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
+{
+	r->rate_pkts_ps = pktrate64;
+	psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift);
+}
+EXPORT_SYMBOL(psched_ppscfg_precompute);
+
 static void mini_qdisc_rcu_func(struct rcu_head *head)
 {
 }
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8287894541e3..922ed6b91abb 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -411,18 +411,10 @@ done:
 	return txtime;
 }
 
-static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
-			  struct sk_buff **to_free)
+static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
+			      struct Qdisc *child, struct sk_buff **to_free)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
-	struct Qdisc *child;
-	int queue;
-
-	queue = skb_get_queue_mapping(skb);
-
-	child = q->qdiscs[queue];
-	if (unlikely(!child))
-		return qdisc_drop(skb, sch, to_free);
 
 	if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
 		if (!is_valid_interval(skb, sch))
@@ -439,6 +431,58 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	return qdisc_enqueue(skb, child, to_free);
 }
 
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct Qdisc *child;
+	int queue;
+
+	queue = skb_get_queue_mapping(skb);
+
+	child = q->qdiscs[queue];
+	if (unlikely(!child))
+		return qdisc_drop(skb, sch, to_free);
+
+	/* Large packets might not be transmitted when the transmission duration
+	 * exceeds any configured interval. Therefore, segment the skb into
+	 * smaller chunks. Skip it for the full offload case, as the driver
+	 * and/or the hardware is expected to handle this.
+	 */
+	if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
+		netdev_features_t features = netif_skb_features(skb);
+		struct sk_buff *segs, *nskb;
+		int ret;
+
+		segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+		if (IS_ERR_OR_NULL(segs))
+			return qdisc_drop(skb, sch, to_free);
+
+		skb_list_walk_safe(segs, segs, nskb) {
+			skb_mark_not_on_list(segs);
+			qdisc_skb_cb(segs)->pkt_len = segs->len;
+			slen += segs->len;
+
+			ret = taprio_enqueue_one(segs, sch, child, to_free);
+			if (ret != NET_XMIT_SUCCESS) {
+				if (net_xmit_drop_count(ret))
+					qdisc_qstats_drop(sch);
+			} else {
+				numsegs++;
+			}
+		}
+
+		if (numsegs > 1)
+			qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
+		consume_skb(skb);
+
+		return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+	}
+
+	return taprio_enqueue_one(skb, sch, child, to_free);
+}
+
 static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index abe29d1aa23a..fd0796269eed 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 1a11831bef62..0772cfadaa0d 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,50 @@
 #include <net/netns/generic.h>
 #include "core.h"
 
+/* Struct tipc_uaddr: internal version of struct sockaddr_tipc.
+ * Must be kept aligned both regarding field positions and size.
+ */
+struct tipc_uaddr {
+	unsigned short family;
+	unsigned char addrtype;
+	signed char scope;
+	union {
+		struct {
+			struct tipc_service_addr sa;
+			u32 lookup_node;
+		};
+		struct tipc_service_range sr;
+		struct tipc_socket_addr sk;
+	};
+};
+
+static inline void tipc_uaddr(struct tipc_uaddr *ua, u32 atype, u32 scope,
+			      u32 type, u32 lower, u32 upper)
+{
+	ua->family = AF_TIPC;
+	ua->addrtype = atype;
+	ua->scope = scope;
+	ua->sr.type = type;
+	ua->sr.lower = lower;
+	ua->sr.upper = upper;
+}
+
+static inline bool tipc_uaddr_valid(struct tipc_uaddr *ua, int len)
+{
+	u32 atype;
+
+	if (len < sizeof(struct sockaddr_tipc))
+		return false;
+	atype = ua->addrtype;
+	if (ua->family != AF_TIPC)
+		return false;
+	if (atype == TIPC_SERVICE_ADDR || atype == TIPC_SOCKET_ADDR)
+		return true;
+	if (atype == TIPC_SERVICE_RANGE)
+		return ua->sr.upper >= ua->sr.lower;
+	return false;
+}
+
 static inline u32 tipc_own_addr(struct net *net)
 {
 	return tipc_net(net)->node_addr;
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index f4fca8f7f63f..6f64acef73dc 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -317,7 +317,7 @@ static int tipc_aead_key_generate(struct tipc_aead_key *skey);
 
 #define tipc_aead_rcu_replace(rcu_ptr, ptr, lock)			\
 do {									\
-	typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr),	\
+	struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr),	\
 						lockdep_is_held(lock));	\
 	rcu_assign_pointer((rcu_ptr), (ptr));				\
 	tipc_aead_put(__tmp);						\
@@ -798,7 +798,7 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
 	ehdr = (struct tipc_ehdr *)skb->data;
 	salt = aead->salt;
 	if (aead->mode == CLUSTER_KEY)
-		salt ^= ehdr->addr; /* __be32 */
+		salt ^= __be32_to_cpu(ehdr->addr);
 	else if (__dnode)
 		salt ^= tipc_node_get_addr(__dnode);
 	memcpy(iv, &salt, 4);
@@ -929,7 +929,7 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
 	ehdr = (struct tipc_ehdr *)skb->data;
 	salt = aead->salt;
 	if (aead->mode == CLUSTER_KEY)
-		salt ^= ehdr->addr; /* __be32 */
+		salt ^= __be32_to_cpu(ehdr->addr);
 	else if (ehdr->destined)
 		salt ^= tipc_own_addr(net);
 	memcpy(iv, &salt, 4);
@@ -1946,16 +1946,16 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
 			goto rcv;
 		}
 		tipc_aead_put(aead);
-		aead = tipc_aead_get(tmp);
+		aead = tipc_aead_get((struct tipc_aead __force __rcu *)tmp);
 	}
 
 	if (unlikely(err)) {
-		tipc_aead_users_dec(aead, INT_MIN);
+		tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN);
 		goto free_skb;
 	}
 
 	/* Set the RX key's user */
-	tipc_aead_users_set(aead, 1);
+	tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1);
 
 	/* Mark this point, RX works */
 	rx->timer1 = jiffies;
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 48fac3b17e40..407619697292 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -104,6 +104,36 @@ static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
 
 const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
 
+static inline u16 mon_cpu_to_le16(u16 val)
+{
+	return (__force __u16)htons(val);
+}
+
+static inline u32 mon_cpu_to_le32(u32 val)
+{
+	return (__force __u32)htonl(val);
+}
+
+static inline u64 mon_cpu_to_le64(u64 val)
+{
+	return (__force __u64)cpu_to_be64(val);
+}
+
+static inline u16 mon_le16_to_cpu(u16 val)
+{
+	return ntohs((__force __be16)val);
+}
+
+static inline u32 mon_le32_to_cpu(u32 val)
+{
+	return ntohl((__force __be32)val);
+}
+
+static inline u64 mon_le64_to_cpu(u64 val)
+{
+	return be64_to_cpu((__force __be64)val);
+}
+
 /* dom_rec_len(): actual length of domain record for transport
  */
 static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
@@ -260,16 +290,16 @@ static void mon_update_local_domain(struct tipc_monitor *mon)
 		diff |= dom->members[i] != peer->addr;
 		dom->members[i] = peer->addr;
 		map_set(&dom->up_map, i, peer->is_up);
-		cache->members[i] = htonl(peer->addr);
+		cache->members[i] = mon_cpu_to_le32(peer->addr);
 	}
 	diff |= dom->up_map != prev_up_map;
 	if (!diff)
 		return;
 	dom->gen = ++mon->dom_gen;
-	cache->len = htons(dom->len);
-	cache->gen = htons(dom->gen);
-	cache->member_cnt = htons(member_cnt);
-	cache->up_map = cpu_to_be64(dom->up_map);
+	cache->len = mon_cpu_to_le16(dom->len);
+	cache->gen = mon_cpu_to_le16(dom->gen);
+	cache->member_cnt = mon_cpu_to_le16(member_cnt);
+	cache->up_map = mon_cpu_to_le64(dom->up_map);
 	mon_apply_domain(mon, self);
 }
 
@@ -455,10 +485,11 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
 	struct tipc_mon_domain dom_bef;
 	struct tipc_mon_domain *dom;
 	struct tipc_peer *peer;
-	u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
+	u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt);
 	int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
-	u16 new_gen = ntohs(arrv_dom->gen);
-	u16 acked_gen = ntohs(arrv_dom->ack_gen);
+	u16 new_gen = mon_le16_to_cpu(arrv_dom->gen);
+	u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen);
+	u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len);
 	bool probing = state->probing;
 	int i, applied_bef;
 
@@ -469,7 +500,7 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
 		return;
 	if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
 		return;
-	if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen)
+	if (dlen < new_dlen || arrv_dlen != new_dlen)
 		return;
 
 	/* Synch generation numbers with peer if link just came up */
@@ -517,9 +548,9 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
 	dom->len = new_dlen;
 	dom->gen = new_gen;
 	dom->member_cnt = new_member_cnt;
-	dom->up_map = be64_to_cpu(arrv_dom->up_map);
+	dom->up_map = mon_le64_to_cpu(arrv_dom->up_map);
 	for (i = 0; i < new_member_cnt; i++)
-		dom->members[i] = ntohl(arrv_dom->members[i]);
+		dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]);
 
 	/* Update peers affected by this domain record */
 	applied_bef = peer->applied;
@@ -548,19 +579,19 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
 	if (likely(state->acked_gen == gen)) {
 		len = dom_rec_len(dom, 0);
 		*dlen = len;
-		dom->len = htons(len);
-		dom->gen = htons(gen);
-		dom->ack_gen = htons(state->peer_gen);
+		dom->len = mon_cpu_to_le16(len);
+		dom->gen = mon_cpu_to_le16(gen);
+		dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
 		dom->member_cnt = 0;
 		return;
 	}
 	/* Send the full record */
 	read_lock_bh(&mon->lock);
-	len = ntohs(mon->cache.len);
+	len = mon_le16_to_cpu(mon->cache.len);
 	*dlen = len;
 	memcpy(data, &mon->cache, len);
 	read_unlock_bh(&mon->lock);
-	dom->ack_gen = htons(state->peer_gen);
+	dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
 }
 
 void tipc_mon_get_state(struct net *net, u32 addr,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index e9263280a2d4..3f0a25345a7c 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -707,8 +707,11 @@ bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
 bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 {
 	struct tipc_msg *msg = buf_msg(skb);
-	u32 dport, dnode;
-	u32 onode = tipc_own_addr(net);
+	u32 scope = msg_lookup_scope(msg);
+	u32 self = tipc_own_addr(net);
+	u32 inst = msg_nameinst(msg);
+	struct tipc_socket_addr sk;
+	struct tipc_uaddr ua;
 
 	if (!msg_isdata(msg))
 		return false;
@@ -722,16 +725,16 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	msg = buf_msg(skb);
 	if (msg_reroute_cnt(msg))
 		return false;
-	dnode = tipc_scope2node(net, msg_lookup_scope(msg));
-	dport = tipc_nametbl_translate(net, msg_nametype(msg),
-				       msg_nameinst(msg), &dnode);
-	if (!dport)
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope,
+		   msg_nametype(msg), inst, inst);
+	sk.node = tipc_scope2node(net, scope);
+	if (!tipc_nametbl_lookup_anycast(net, &ua, &sk))
 		return false;
 	msg_incr_reroute_cnt(msg);
-	if (dnode != onode)
-		msg_set_prevnode(msg, onode);
-	msg_set_destnode(msg, dnode);
-	msg_set_destport(msg, dport);
+	if (sk.node != self)
+		msg_set_prevnode(msg, self);
+	msg_set_destnode(msg, sk.node);
+	msg_set_destport(msg, sk.ref);
 	*err = TIPC_OK;
 
 	return true;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 6cf57c3bfa27..bda902caa814 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -1,8 +1,9 @@
 /*
  * net/tipc/name_distr.c: TIPC name distribution code
  *
- * Copyright (c) 2000-2006, 2014, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2019, Ericsson AB
  * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,10 +56,10 @@ struct distr_queue_item {
  */
 static void publ_to_item(struct distr_item *i, struct publication *p)
 {
-	i->type = htonl(p->type);
-	i->lower = htonl(p->lower);
-	i->upper = htonl(p->upper);
-	i->port = htonl(p->port);
+	i->type = htonl(p->sr.type);
+	i->lower = htonl(p->sr.lower);
+	i->upper = htonl(p->sr.upper);
+	i->port = htonl(p->sk.ref);
 	i->key = htonl(p->key);
 }
 
@@ -90,20 +91,20 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
 /**
  * tipc_named_publish - tell other nodes about a new publication by this node
  * @net: the associated network namespace
- * @publ: the new publication
+ * @p: the new publication
  */
-struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
+struct sk_buff *tipc_named_publish(struct net *net, struct publication *p)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct distr_item *item;
 	struct sk_buff *skb;
 
-	if (publ->scope == TIPC_NODE_SCOPE) {
-		list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
+	if (p->scope == TIPC_NODE_SCOPE) {
+		list_add_tail_rcu(&p->binding_node, &nt->node_scope);
 		return NULL;
 	}
 	write_lock_bh(&nt->cluster_scope_lock);
-	list_add_tail(&publ->binding_node, &nt->cluster_scope);
+	list_add_tail(&p->binding_node, &nt->cluster_scope);
 	write_unlock_bh(&nt->cluster_scope_lock);
 	skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
 	if (!skb) {
@@ -113,25 +114,25 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 	msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
 	msg_set_non_legacy(buf_msg(skb));
 	item = (struct distr_item *)msg_data(buf_msg(skb));
-	publ_to_item(item, publ);
+	publ_to_item(item, p);
 	return skb;
 }
 
 /**
  * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
  * @net: the associated network namespace
- * @publ: the withdrawn publication
+ * @p: the withdrawn publication
  */
-struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
+struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct distr_item *item;
 	struct sk_buff *skb;
 
 	write_lock_bh(&nt->cluster_scope_lock);
-	list_del(&publ->binding_node);
+	list_del(&p->binding_node);
 	write_unlock_bh(&nt->cluster_scope_lock);
-	if (publ->scope == TIPC_NODE_SCOPE)
+	if (p->scope == TIPC_NODE_SCOPE)
 		return NULL;
 
 	skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
@@ -142,7 +143,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
 	msg_set_non_legacy(buf_msg(skb));
 	item = (struct distr_item *)msg_data(buf_msg(skb));
-	publ_to_item(item, publ);
+	publ_to_item(item, p);
 	return skb;
 }
 
@@ -233,33 +234,27 @@ void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities)
 /**
  * tipc_publ_purge - remove publication associated with a failed node
  * @net: the associated network namespace
- * @publ: the publication to remove
+ * @p: the publication to remove
  * @addr: failed node's address
  *
  * Invoked for each publication issued by a newly failed node.
  * Removes publication structure from name table & deletes it.
  */
-static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
+static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr)
 {
 	struct tipc_net *tn = tipc_net(net);
-	struct publication *p;
+	struct publication *_p;
+	struct tipc_uaddr ua;
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type,
+		   p->sr.lower, p->sr.upper);
 	spin_lock_bh(&tn->nametbl_lock);
-	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper,
-				     publ->node, publ->key);
-	if (p)
-		tipc_node_unsubscribe(net, &p->binding_node, addr);
+	_p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key);
+	if (_p)
+		tipc_node_unsubscribe(net, &_p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
-
-	if (p != publ) {
-		pr_err("Unable to remove publication from failed node\n"
-		       " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
-		       publ->type, publ->lower, publ->node, publ->port,
-		       publ->key);
-	}
-
-	if (p)
-		kfree_rcu(p, rcu);
+	if (_p)
+		kfree_rcu(_p, rcu);
 }
 
 void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
@@ -293,30 +288,30 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 				u32 node, u32 dtype)
 {
 	struct publication *p = NULL;
-	u32 lower = ntohl(i->lower);
-	u32 upper = ntohl(i->upper);
-	u32 type = ntohl(i->type);
-	u32 port = ntohl(i->port);
+	struct tipc_socket_addr sk;
+	struct tipc_uaddr ua;
 	u32 key = ntohl(i->key);
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+		   ntohl(i->type), ntohl(i->lower), ntohl(i->upper));
+	sk.ref = ntohl(i->port);
+	sk.node = node;
+
 	if (dtype == PUBLICATION) {
-		p = tipc_nametbl_insert_publ(net, type, lower, upper,
-					     TIPC_CLUSTER_SCOPE, node,
-					     port, key);
+		p = tipc_nametbl_insert_publ(net, &ua, &sk, key);
 		if (p) {
 			tipc_node_subscribe(net, &p->binding_node, node);
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
-		p = tipc_nametbl_remove_publ(net, type, lower,
-					     upper, node, key);
+		p = tipc_nametbl_remove_publ(net, &ua, &sk, key);
 		if (p) {
 			tipc_node_unsubscribe(net, &p->binding_node, node);
 			kfree_rcu(p, rcu);
 			return true;
 		}
-		pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n",
-				    type, lower, node);
+		pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
+				    ua.sr.type, ua.sr.lower, node);
 	} else {
 		pr_warn("Unrecognized name table message received\n");
 	}
@@ -410,15 +405,15 @@ void tipc_named_reinit(struct net *net)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
-	struct publication *publ;
+	struct publication *p;
 	u32 self = tipc_own_addr(net);
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
-		publ->node = self;
-	list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
-		publ->node = self;
+	list_for_each_entry_rcu(p, &nt->node_scope, binding_node)
+		p->sk.node = self;
+	list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node)
+		p->sk.node = self;
 	nt->rc_dests = 0;
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ee5ac40ea2b6..6db9f9e7c0ac 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -222,59 +222,57 @@ static int hash(int x)
 
 /**
  * tipc_publ_create - create a publication structure
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @scope: publication scope
- * @node: network address of publishing socket
- * @port: publishing port
+ * @ua: the service range the user is binding to
+ * @sk: the address of the socket that is bound
  * @key: publication key
  */
-static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
-					    u32 scope, u32 node, u32 port,
+static struct publication *tipc_publ_create(struct tipc_uaddr *ua,
+					    struct tipc_socket_addr *sk,
 					    u32 key)
 {
-	struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
+	struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC);
 
-	if (!publ)
+	if (!p)
 		return NULL;
 
-	publ->type = type;
-	publ->lower = lower;
-	publ->upper = upper;
-	publ->scope = scope;
-	publ->node = node;
-	publ->port = port;
-	publ->key = key;
-	INIT_LIST_HEAD(&publ->binding_sock);
-	INIT_LIST_HEAD(&publ->binding_node);
-	INIT_LIST_HEAD(&publ->local_publ);
-	INIT_LIST_HEAD(&publ->all_publ);
-	INIT_LIST_HEAD(&publ->list);
-	return publ;
+	p->sr = ua->sr;
+	p->sk = *sk;
+	p->scope = ua->scope;
+	p->key = key;
+	INIT_LIST_HEAD(&p->binding_sock);
+	INIT_LIST_HEAD(&p->binding_node);
+	INIT_LIST_HEAD(&p->local_publ);
+	INIT_LIST_HEAD(&p->all_publ);
+	INIT_LIST_HEAD(&p->list);
+	return p;
 }
 
 /**
  * tipc_service_create - create a service structure for the specified 'type'
- * @type: service type
- * @hd: name_table services list
+ * @net: network namespace
+ * @ua: address representing the service to be bound
  *
  * Allocates a single range structure and sets it to all 0's.
  */
-static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
+static struct tipc_service *tipc_service_create(struct net *net,
+						struct tipc_uaddr *ua)
 {
-	struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC);
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_service *service;
+	struct hlist_head *hd;
 
+	service = kzalloc(sizeof(*service), GFP_ATOMIC);
 	if (!service) {
 		pr_warn("Service creation failed, no memory\n");
 		return NULL;
 	}
 
 	spin_lock_init(&service->lock);
-	service->type = type;
+	service->type = ua->sr.type;
 	service->ranges = RB_ROOT;
 	INIT_HLIST_NODE(&service->service_list);
 	INIT_LIST_HEAD(&service->subscriptions);
+	hd = &nt->services[hash(ua->sr.type)];
 	hlist_add_head_rcu(&service->service_list, hd);
 	return service;
 }
@@ -282,13 +280,13 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
 /*  tipc_service_find_range - find service range matching publication parameters
  */
 static struct service_range *tipc_service_find_range(struct tipc_service *sc,
-						     u32 lower, u32 upper)
+						     struct tipc_uaddr *ua)
 {
 	struct service_range *sr;
 
-	service_range_foreach_match(sr, sc, lower, upper) {
+	service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
 		/* Look for exact match */
-		if (sr->lower == lower && sr->upper == upper)
+		if (sr->lower == ua->sr.lower && sr->upper == ua->sr.upper)
 			return sr;
 	}
 
@@ -296,10 +294,12 @@ static struct service_range *tipc_service_find_range(struct tipc_service *sc,
 }
 
 static struct service_range *tipc_service_create_range(struct tipc_service *sc,
-						       u32 lower, u32 upper)
+						       struct publication *p)
 {
 	struct rb_node **n, *parent = NULL;
 	struct service_range *sr;
+	u32 lower = p->sr.lower;
+	u32 upper = p->sr.upper;
 
 	n = &sc->ranges.rb_node;
 	while (*n) {
@@ -327,64 +327,68 @@ static struct service_range *tipc_service_create_range(struct tipc_service *sc,
 	return sr;
 }
 
-static struct publication *tipc_service_insert_publ(struct net *net,
-						    struct tipc_service *sc,
-						    u32 type, u32 lower,
-						    u32 upper, u32 scope,
-						    u32 node, u32 port,
-						    u32 key)
+static bool tipc_service_insert_publ(struct net *net,
+				     struct tipc_service *sc,
+				     struct publication *p)
 {
 	struct tipc_subscription *sub, *tmp;
 	struct service_range *sr;
-	struct publication *p;
+	struct publication *_p;
+	u32 node = p->sk.node;
 	bool first = false;
+	bool res = false;
+	u32 key = p->key;
 
-	sr = tipc_service_create_range(sc, lower, upper);
+	spin_lock_bh(&sc->lock);
+	sr = tipc_service_create_range(sc, p);
 	if (!sr)
-		goto  err;
+		goto  exit;
 
 	first = list_empty(&sr->all_publ);
 
 	/* Return if the publication already exists */
-	list_for_each_entry(p, &sr->all_publ, all_publ) {
-		if (p->key == key && (!p->node || p->node == node))
-			return NULL;
+	list_for_each_entry(_p, &sr->all_publ, all_publ) {
+		if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) {
+			pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n",
+				 p->sr.type, p->sr.lower, p->sr.upper,
+				 node, p->sk.ref, key);
+			goto exit;
+		}
 	}
 
-	/* Create and insert publication */
-	p = tipc_publ_create(type, lower, upper, scope, node, port, key);
-	if (!p)
-		goto err;
-	/* Suppose there shouldn't be a huge gap btw publs i.e. >INT_MAX */
-	p->id = sc->publ_cnt++;
-	if (in_own_node(net, node))
+	if (in_own_node(net, p->sk.node))
 		list_add(&p->local_publ, &sr->local_publ);
 	list_add(&p->all_publ, &sr->all_publ);
+	p->id = sc->publ_cnt++;
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
-		tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED,
-					p->port, p->node, p->scope, first);
+		tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first);
 	}
-	return p;
-err:
-	pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper);
-	return NULL;
+	res = true;
+exit:
+	if (!res)
+		pr_warn("Failed to bind to %u,%u,%u\n",
+			p->sr.type, p->sr.lower, p->sr.upper);
+	spin_unlock_bh(&sc->lock);
+	return res;
 }
 
 /**
  * tipc_service_remove_publ - remove a publication from a service
- * @sr: service_range to remove publication from
- * @node: target node
+ * @r: service_range to remove publication from
+ * @sk: address publishing socket
  * @key: target publication key
  */
-static struct publication *tipc_service_remove_publ(struct service_range *sr,
-						    u32 node, u32 key)
+static struct publication *tipc_service_remove_publ(struct service_range *r,
+						    struct tipc_socket_addr *sk,
+						    u32 key)
 {
 	struct publication *p;
+	u32 node = sk->node;
 
-	list_for_each_entry(p, &sr->all_publ, all_publ) {
-		if (p->key != key || (node && node != p->node))
+	list_for_each_entry(p, &r->all_publ, all_publ) {
+		if (p->key != key || (node && node != p->sk.node))
 			continue;
 		list_del(&p->all_publ);
 		list_del(&p->local_publ);
@@ -417,17 +421,14 @@ static int tipc_publ_sort(void *priv, struct list_head *a,
 static void tipc_service_subscribe(struct tipc_service *service,
 				   struct tipc_subscription *sub)
 {
-	struct tipc_subscr *sb = &sub->evt.s;
 	struct publication *p, *first, *tmp;
 	struct list_head publ_list;
 	struct service_range *sr;
-	struct tipc_service_range r;
-	u32 filter;
+	u32 filter, lower, upper;
 
-	r.type = tipc_sub_read(sb, seq.type);
-	r.lower = tipc_sub_read(sb, seq.lower);
-	r.upper = tipc_sub_read(sb, seq.upper);
-	filter = tipc_sub_read(sb, filter);
+	filter = sub->s.filter;
+	lower = sub->s.seq.lower;
+	upper = sub->s.seq.upper;
 
 	tipc_sub_get(sub);
 	list_add(&sub->service_list, &service->subscriptions);
@@ -436,7 +437,7 @@ static void tipc_service_subscribe(struct tipc_service *service,
 		return;
 
 	INIT_LIST_HEAD(&publ_list);
-	service_range_foreach_match(sr, service, r.lower, r.upper) {
+	service_range_foreach_match(sr, service, lower, upper) {
 		first = NULL;
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
 			if (filter & TIPC_SUB_PORTS)
@@ -452,80 +453,74 @@ static void tipc_service_subscribe(struct tipc_service *service,
 	/* Sort the publications before reporting */
 	list_sort(NULL, &publ_list, tipc_publ_sort);
 	list_for_each_entry_safe(p, tmp, &publ_list, list) {
-		tipc_sub_report_overlap(sub, p->lower, p->upper,
-					TIPC_PUBLISHED, p->port, p->node,
-					p->scope, true);
+		tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true);
 		list_del_init(&p->list);
 	}
 }
 
-static struct tipc_service *tipc_service_find(struct net *net, u32 type)
+static struct tipc_service *tipc_service_find(struct net *net,
+					      struct tipc_uaddr *ua)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct hlist_head *service_head;
 	struct tipc_service *service;
 
-	service_head = &nt->services[hash(type)];
+	service_head = &nt->services[hash(ua->sr.type)];
 	hlist_for_each_entry_rcu(service, service_head, service_list) {
-		if (service->type == type)
+		if (service->type == ua->sr.type)
 			return service;
 	}
 	return NULL;
 };
 
-struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper,
-					     u32 scope, u32 node,
-					     u32 port, u32 key)
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key)
 {
-	struct name_table *nt = tipc_name_table(net);
 	struct tipc_service *sc;
 	struct publication *p;
 
-	if (scope > TIPC_NODE_SCOPE || lower > upper) {
-		pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n",
-			 type, lower, upper, scope);
-		return NULL;
-	}
-	sc = tipc_service_find(net, type);
-	if (!sc)
-		sc = tipc_service_create(type, &nt->services[hash(type)]);
-	if (!sc)
+	p = tipc_publ_create(ua, sk, key);
+	if (!p)
 		return NULL;
 
-	spin_lock_bh(&sc->lock);
-	p = tipc_service_insert_publ(net, sc, type, lower, upper,
-				     scope, node, port, key);
-	spin_unlock_bh(&sc->lock);
-	return p;
+	sc = tipc_service_find(net, ua);
+	if (!sc)
+		sc = tipc_service_create(net, ua);
+	if (sc && tipc_service_insert_publ(net, sc, p))
+		return p;
+	kfree(p);
+	return NULL;
 }
 
-struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper,
-					     u32 node, u32 key)
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key)
 {
-	struct tipc_service *sc = tipc_service_find(net, type);
 	struct tipc_subscription *sub, *tmp;
-	struct service_range *sr = NULL;
 	struct publication *p = NULL;
+	struct service_range *sr;
+	struct tipc_service *sc;
 	bool last;
 
+	sc = tipc_service_find(net, ua);
 	if (!sc)
-		return NULL;
+		goto exit;
 
 	spin_lock_bh(&sc->lock);
-	sr = tipc_service_find_range(sc, lower, upper);
+	sr = tipc_service_find_range(sc, ua);
 	if (!sr)
-		goto exit;
-	p = tipc_service_remove_publ(sr, node, key);
+		goto unlock;
+	p = tipc_service_remove_publ(sr, sk, key);
 	if (!p)
-		goto exit;
+		goto unlock;
 
 	/* Notify any waiting subscriptions */
 	last = list_empty(&sr->all_publ);
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
-		tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN,
-					p->port, node, p->scope, last);
+		tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last);
 	}
 
 	/* Remove service range item if this was its last publication */
@@ -534,77 +529,83 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 		kfree(sr);
 	}
 
-	/* Delete service item if this no more publications and subscriptions */
+	/* Delete service item if no more publications and subscriptions */
 	if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
 		hlist_del_init_rcu(&sc->service_list);
 		kfree_rcu(sc, rcu);
 	}
-exit:
+unlock:
 	spin_unlock_bh(&sc->lock);
+exit:
+	if (!p) {
+		pr_err("Failed to remove unknown binding: %u,%u,%u/%u:%u/%u\n",
+		       ua->sr.type, ua->sr.lower, ua->sr.upper,
+		       sk->node, sk->ref, key);
+	}
 	return p;
 }
 
 /**
- * tipc_nametbl_translate - perform service instance to socket translation
+ * tipc_nametbl_lookup_anycast - perform service instance to socket translation
  * @net: network namespace
- * @type: message type
- * @instance: message instance
- * @dnode: the search domain used during translation
+ * @ua: service address to look up
+ * @sk: address to socket we want to find
  *
+ * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be
+ * performed, which may not be this one.
  * On exit:
- * - if translation is deferred to another node, leave 'dnode' unchanged and
- * return 0
- * - if translation is attempted and succeeds, set 'dnode' to the publishing
- * node and return the published (non-zero) port number
- * - if translation is attempted and fails, set 'dnode' to 0 and return 0
+ * - If lookup is deferred to another node, leave 'sk->node' unchanged and
+ *   return 'true'.
+ * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which
+ *   represent the bound socket and return 'true'.
+ * - If lookup fails, return 'false'
  *
  * Note that for legacy users (node configured with Z.C.N address format) the
- * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0
+ * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0
  * we must look in the local binding list first
  */
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
+bool tipc_nametbl_lookup_anycast(struct net *net,
+				 struct tipc_uaddr *ua,
+				 struct tipc_socket_addr *sk)
 {
 	struct tipc_net *tn = tipc_net(net);
 	bool legacy = tn->legacy_addr_format;
 	u32 self = tipc_own_addr(net);
-	struct service_range *sr;
+	u32 inst = ua->sa.instance;
+	struct service_range *r;
 	struct tipc_service *sc;
-	struct list_head *list;
 	struct publication *p;
-	u32 port = 0;
-	u32 node = 0;
+	struct list_head *l;
+	bool res = false;
 
-	if (!tipc_in_scope(legacy, *dnode, self))
-		return 0;
+	if (!tipc_in_scope(legacy, sk->node, self))
+		return true;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua);
 	if (unlikely(!sc))
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
-	service_range_foreach_match(sr, sc, instance, instance) {
+	service_range_foreach_match(r, sc, inst, inst) {
 		/* Select lookup algo: local, closest-first or round-robin */
-		if (*dnode == self) {
-			list = &sr->local_publ;
-			if (list_empty(list))
+		if (sk->node == self) {
+			l = &r->local_publ;
+			if (list_empty(l))
 				continue;
-			p = list_first_entry(list, struct publication,
-					     local_publ);
-			list_move_tail(&p->local_publ, &sr->local_publ);
-		} else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
-			list = &sr->local_publ;
-			p = list_first_entry(list, struct publication,
-					     local_publ);
-			list_move_tail(&p->local_publ, &sr->local_publ);
+			p = list_first_entry(l, struct publication, local_publ);
+			list_move_tail(&p->local_publ, &r->local_publ);
+		} else if (legacy && !sk->node && !list_empty(&r->local_publ)) {
+			l = &r->local_publ;
+			p = list_first_entry(l, struct publication, local_publ);
+			list_move_tail(&p->local_publ, &r->local_publ);
 		} else {
-			list = &sr->all_publ;
-			p = list_first_entry(list, struct publication,
-					     all_publ);
-			list_move_tail(&p->all_publ, &sr->all_publ);
+			l = &r->all_publ;
+			p = list_first_entry(l, struct publication, all_publ);
+			list_move_tail(&p->all_publ, &r->all_publ);
 		}
-		port = p->port;
-		node = p->node;
+		*sk = p->sk;
+		res = true;
 		/* Todo: as for legacy, pick the first matching range only, a
 		 * "true" round-robin will be performed as needed.
 		 */
@@ -614,40 +615,45 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
 
 exit:
 	rcu_read_unlock();
-	*dnode = node;
-	return port;
+	return res;
 }
 
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
-			 struct list_head *dsts, int *dstcnt, u32 exclude,
-			 bool all)
+/* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group
+ * Returns a list of one (== group anycast) or more (== group multicast)
+ * destination socket/node pairs matching the given address.
+ * The requester may or may not want to exclude himself from the list.
+ */
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+			       struct list_head *dsts, int *dstcnt,
+			       u32 exclude, bool mcast)
 {
 	u32 self = tipc_own_addr(net);
+	u32 inst = ua->sa.instance;
 	struct service_range *sr;
 	struct tipc_service *sc;
 	struct publication *p;
 
 	*dstcnt = 0;
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua);
 	if (unlikely(!sc))
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
 
 	/* Todo: a full search i.e. service_range_foreach_match() instead? */
-	sr = service_range_match_first(sc->ranges.rb_node, instance, instance);
+	sr = service_range_match_first(sc->ranges.rb_node, inst, inst);
 	if (!sr)
 		goto no_match;
 
 	list_for_each_entry(p, &sr->all_publ, all_publ) {
-		if (p->scope != scope)
+		if (p->scope != ua->scope)
 			continue;
-		if (p->port == exclude && p->node == self)
+		if (p->sk.ref == exclude && p->sk.node == self)
 			continue;
-		tipc_dest_push(dsts, p->node, p->port);
+		tipc_dest_push(dsts, p->sk.node, p->sk.ref);
 		(*dstcnt)++;
-		if (all)
+		if (mcast)
 			continue;
 		list_move_tail(&p->all_publ, &sr->all_publ);
 		break;
@@ -659,23 +665,29 @@ exit:
 	return !list_empty(dsts);
 }
 
-void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			    u32 scope, bool exact, struct list_head *dports)
+/* tipc_nametbl_lookup_mcast_sockets(): look up node local destinaton sockets
+ *                                      matching the given address
+ * Used on nodes which have received a multicast/broadcast message
+ * Returns a list of local sockets
+ */
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+				       bool exact, struct list_head *dports)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
 	struct publication *p;
+	u32 scope = ua->scope;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
-	service_range_foreach_match(sr, sc, lower, upper) {
+	service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
 		list_for_each_entry(p, &sr->local_publ, local_publ) {
 			if (p->scope == scope || (!exact && p->scope < scope))
-				tipc_dest_push(dports, 0, p->port);
+				tipc_dest_push(dports, 0, p->sk.ref);
 		}
 	}
 	spin_unlock_bh(&sc->lock);
@@ -683,26 +695,27 @@ exit:
 	rcu_read_unlock();
 }
 
-/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
- * - Creates list of nodes that overlap the given multicast address
- * - Determines if any node local destinations overlap
+/* tipc_nametbl_lookup_mcast_nodes(): look up all destination nodes matching
+ *                                    the given address. Used in sending node.
+ * Used on nodes which are sending out a multicast/broadcast message
+ * Returns a list of nodes, including own node if applicable
  */
-void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
-				   u32 upper, struct tipc_nlist *nodes)
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+				     struct tipc_nlist *nodes)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
 	struct publication *p;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
-	service_range_foreach_match(sr, sc, lower, upper) {
+	service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
-			tipc_nlist_add(nodes, p->node);
+			tipc_nlist_add(nodes, p->sk.node);
 		}
 	}
 	spin_unlock_bh(&sc->lock);
@@ -713,7 +726,7 @@ exit:
 /* tipc_nametbl_build_group - build list of communication group members
  */
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
-			      u32 type, u32 scope)
+			      struct tipc_uaddr *ua)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
@@ -721,7 +734,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	struct rb_node *n;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
 		goto exit;
 
@@ -729,9 +742,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
 		sr = container_of(n, struct service_range, tree_node);
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
-			if (p->scope != scope)
+			if (p->scope != ua->scope)
 				continue;
-			tipc_group_add_member(grp, p->node, p->port, p->lower);
+			tipc_group_add_member(grp, p->sk.node, p->sk.ref,
+					      p->sr.lower);
 		}
 	}
 	spin_unlock_bh(&sc->lock);
@@ -741,9 +755,8 @@ exit:
 
 /* tipc_nametbl_publish - add service binding to name table
  */
-struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
-					 u32 upper, u32 scope, u32 port,
-					 u32 key)
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+					 struct tipc_socket_addr *sk, u32 key)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
@@ -758,8 +771,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 		goto exit;
 	}
 
-	p = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
-				     tipc_own_addr(net), port, key);
+	p = tipc_nametbl_insert_publ(net, ua, sk, key);
 	if (p) {
 		nt->local_publ_count++;
 		skb = tipc_named_publish(net, p);
@@ -777,41 +789,33 @@ exit:
 /**
  * tipc_nametbl_withdraw - withdraw a service binding
  * @net: network namespace
- * @type: service type
- * @lower: service range lower bound
- * @upper: service range upper bound
+ * @ua: service address/range being unbound
+ * @sk: address of the socket being unbound from
  * @key: target publication key
  */
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
-			  u32 upper, u32 key)
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+			   struct tipc_socket_addr *sk, u32 key)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
-	u32 self = tipc_own_addr(net);
 	struct sk_buff *skb = NULL;
 	struct publication *p;
 	u32 rc_dests;
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key);
+	p = tipc_nametbl_remove_publ(net, ua, sk, key);
 	if (p) {
 		nt->local_publ_count--;
 		skb = tipc_named_withdraw(net, p);
 		list_del_init(&p->binding_sock);
 		kfree_rcu(p, rcu);
-	} else {
-		pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
-		       type, lower, upper, key);
 	}
 	rc_dests = nt->rc_dests;
 	spin_unlock_bh(&tn->nametbl_lock);
 
-	if (skb) {
+	if (skb)
 		tipc_node_broadcast(net, skb, rc_dests);
-		return 1;
-	}
-	return 0;
 }
 
 /**
@@ -820,25 +824,25 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
  */
 bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
-	struct name_table *nt = tipc_name_table(sub->net);
 	struct tipc_net *tn = tipc_net(sub->net);
-	struct tipc_subscr *s = &sub->evt.s;
-	u32 type = tipc_sub_read(s, seq.type);
+	u32 type = sub->s.seq.type;
 	struct tipc_service *sc;
+	struct tipc_uaddr ua;
 	bool res = true;
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
+		   sub->s.seq.lower, sub->s.seq.upper);
 	spin_lock_bh(&tn->nametbl_lock);
-	sc = tipc_service_find(sub->net, type);
+	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
-		sc = tipc_service_create(type, &nt->services[hash(type)]);
+		sc = tipc_service_create(sub->net, &ua);
 	if (sc) {
 		spin_lock_bh(&sc->lock);
 		tipc_service_subscribe(sc, sub);
 		spin_unlock_bh(&sc->lock);
 	} else {
-		pr_warn("Failed to subscribe for {%u,%u,%u}\n", type,
-			tipc_sub_read(s, seq.lower),
-			tipc_sub_read(s, seq.upper));
+		pr_warn("Failed to subscribe for {%u,%u,%u}\n",
+			type, sub->s.seq.lower, sub->s.seq.upper);
 		res = false;
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
@@ -852,12 +856,13 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
 void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
 	struct tipc_net *tn = tipc_net(sub->net);
-	struct tipc_subscr *s = &sub->evt.s;
-	u32 type = tipc_sub_read(s, seq.type);
 	struct tipc_service *sc;
+	struct tipc_uaddr ua;
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+		   sub->s.seq.type, sub->s.seq.lower, sub->s.seq.upper);
 	spin_lock_bh(&tn->nametbl_lock);
-	sc = tipc_service_find(sub->net, type);
+	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
 		goto exit;
 
@@ -909,7 +914,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
 	spin_lock_bh(&sc->lock);
 	rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
 		list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) {
-			tipc_service_remove_publ(sr, p->node, p->key);
+			tipc_service_remove_publ(sr, &p->sk, p->key);
 			kfree_rcu(p, rcu);
 		}
 		rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
@@ -993,9 +998,9 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->sk.node))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->sk.ref))
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
 			goto publ_msg_full;
@@ -1046,6 +1051,7 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
 	struct tipc_net *tn = tipc_net(net);
 	struct tipc_service *service = NULL;
 	struct hlist_head *head;
+	struct tipc_uaddr ua;
 	int err;
 	int i;
 
@@ -1059,7 +1065,9 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
 
 		if (*last_type ||
 		    (!i && *last_key && (*last_lower == *last_key))) {
-			service = tipc_service_find(net, *last_type);
+			tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+				   *last_type, *last_lower, *last_lower);
+			service = tipc_service_find(net, &ua);
 			if (!service)
 				return -EPIPE;
 		} else {
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 5a82a01369d6..c7c9a3ddd420 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,6 +42,7 @@ struct tipc_subscription;
 struct tipc_plist;
 struct tipc_nlist;
 struct tipc_group;
+struct tipc_uaddr;
 
 /*
  * TIPC name types reserved for internal TIPC use (both current and planned)
@@ -50,13 +52,10 @@ struct tipc_group;
 #define TIPC_NAMETBL_SIZE	1024	/* must be a power of 2 */
 
 /**
- * struct publication - info about a published (name or) name sequence
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
+ * struct publication - info about a published service address or range
+ * @sr: service range represented by this publication
+ * @sk: address of socket bound to this publication
  * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
- * @node: network address of publishing socket's node
- * @port: publishing port
  * @key: publication key, unique across the cluster
  * @id: publication id
  * @binding_node: all publications from the same node which bound this one
@@ -74,12 +73,9 @@ struct tipc_group;
  * @rcu: RCU callback head used for deferred freeing
  */
 struct publication {
-	u32 type;
-	u32 lower;
-	u32 upper;
-	u32 scope;
-	u32 node;
-	u32 port;
+	struct tipc_service_range sr;
+	struct tipc_socket_addr sk;
+	u16 scope;
 	u32 key;
 	u32 id;
 	struct list_head binding_node;
@@ -114,28 +110,29 @@ struct name_table {
 };
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			    u32 scope, bool exact, struct list_head *dports);
+bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
+				 struct tipc_socket_addr *sk);
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+				       bool exact, struct list_head *dports);
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+				     struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+			       struct list_head *dsts, int *dstcnt,
+			       u32 exclude, bool mcast);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
-			      u32 type, u32 domain);
-void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
-				   u32 upper, struct tipc_nlist *nodes);
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
-			 struct list_head *dsts, int *dstcnt, u32 exclude,
-			 bool all);
-struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
-					 u32 upper, u32 scope, u32 port,
-					 u32 key);
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
-			  u32 key);
-struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper, u32 scope,
-					     u32 node, u32 ref, u32 key);
-struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper,
-					     u32 node, u32 key);
+			      struct tipc_uaddr *ua);
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+					 struct tipc_socket_addr *sk, u32 key);
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+			   struct tipc_socket_addr *sk, u32 key);
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key);
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key);
 bool tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a129f661bee3..3f927949bb23 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -125,6 +125,11 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 static void tipc_net_finalize(struct net *net, u32 addr)
 {
 	struct tipc_net *tn = tipc_net(net);
+	struct tipc_socket_addr sk = {0, addr};
+	struct tipc_uaddr ua;
+
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+		   TIPC_NODE_STATE, addr, addr);
 
 	if (cmpxchg(&tn->node_addr, 0, addr))
 		return;
@@ -132,8 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr)
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
 	tipc_mon_reinit_self(net);
-	tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr,
-			     TIPC_CLUSTER_SCOPE, 0, addr);
+	tipc_nametbl_publish(net, &ua, &sk, addr);
 }
 
 void tipc_net_finalize_work(struct work_struct *work)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 5a1ce64039f7..0749df80454d 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -696,7 +696,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
 	if (err)
 		return err;
 
-	link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
+	link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST]));
 	link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
 	nla_strscpy(link_info.str, link[TIPC_NLA_LINK_NAME],
 		    TIPC_MAX_LINK_NAME);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 136338b85504..61c38eaaa298 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -372,42 +372,49 @@ static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
 }
 
 static void tipc_node_read_lock(struct tipc_node *n)
+	__acquires(n->lock)
 {
 	read_lock_bh(&n->lock);
 }
 
 static void tipc_node_read_unlock(struct tipc_node *n)
+	__releases(n->lock)
 {
 	read_unlock_bh(&n->lock);
 }
 
 static void tipc_node_write_lock(struct tipc_node *n)
+	__acquires(n->lock)
 {
 	write_lock_bh(&n->lock);
 }
 
 static void tipc_node_write_unlock_fast(struct tipc_node *n)
+	__releases(n->lock)
 {
 	write_unlock_bh(&n->lock);
 }
 
 static void tipc_node_write_unlock(struct tipc_node *n)
+	__releases(n->lock)
 {
+	struct tipc_socket_addr sk;
 	struct net *net = n->net;
-	u32 addr = 0;
 	u32 flags = n->action_flags;
-	u32 link_id = 0;
-	u32 bearer_id;
 	struct list_head *publ_list;
+	struct tipc_uaddr ua;
+	u32 bearer_id;
 
 	if (likely(!flags)) {
 		write_unlock_bh(&n->lock);
 		return;
 	}
 
-	addr = n->addr;
-	link_id = n->link_id;
-	bearer_id = link_id & 0xffff;
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+		   TIPC_LINK_STATE, n->addr, n->addr);
+	sk.ref = n->link_id;
+	sk.node = n->addr;
+	bearer_id = n->link_id & 0xffff;
 	publ_list = &n->publ_list;
 
 	n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -416,20 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	write_unlock_bh(&n->lock);
 
 	if (flags & TIPC_NOTIFY_NODE_DOWN)
-		tipc_publ_notify(net, publ_list, addr, n->capabilities);
+		tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_NODE_UP)
-		tipc_named_node_up(net, addr, n->capabilities);
+		tipc_named_node_up(net, n->addr, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_LINK_UP) {
-		tipc_mon_peer_up(net, addr, bearer_id);
-		tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
-				     TIPC_NODE_SCOPE, link_id, link_id);
+		tipc_mon_peer_up(net, n->addr, bearer_id);
+		tipc_nametbl_publish(net, &ua, &sk, n->link_id);
 	}
 	if (flags & TIPC_NOTIFY_LINK_DOWN) {
-		tipc_mon_peer_down(net, addr, bearer_id);
-		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
-				      addr, link_id);
+		tipc_mon_peer_down(net, n->addr, bearer_id);
+		tipc_nametbl_withdraw(net, &ua, &sk, n->link_id);
 	}
 }
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cebcc104dc70..117a472a8e61 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2001-2007, 2012-2019, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -111,7 +111,6 @@ struct tipc_sock {
 	struct sock sk;
 	u32 conn_type;
 	u32 conn_instance;
-	int published;
 	u32 max_pkt;
 	u32 maxnagle;
 	u32 portid;
@@ -141,6 +140,7 @@ struct tipc_sock {
 	bool expect_ack;
 	bool nodelay;
 	bool group_is_open;
+	bool published;
 };
 
 static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -151,10 +151,8 @@ static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 		       bool kern);
 static void tipc_sk_timeout(struct timer_list *t);
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
-			   struct tipc_service_range const *seq);
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
-			    struct tipc_service_range const *seq);
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
 static int tipc_sk_leave(struct tipc_sock *tsk);
 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
 static int tipc_sk_insert(struct tipc_sock *tsk);
@@ -644,7 +642,7 @@ static int tipc_release(struct socket *sock)
 	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 	tipc_sk_leave(tsk);
-	tipc_sk_withdraw(tsk, 0, NULL);
+	tipc_sk_withdraw(tsk, NULL);
 	__skb_queue_purge(&tsk->mc_method.deferredq);
 	sk_stop_timer(sk, &sk->sk_timer);
 	tipc_sk_remove(tsk);
@@ -677,22 +675,31 @@ static int tipc_release(struct socket *sock)
  */
 static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 {
-	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr;
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
 	struct tipc_sock *tsk = tipc_sk(sock->sk);
+	bool unbind = false;
 
 	if (unlikely(!alen))
-		return tipc_sk_withdraw(tsk, 0, NULL);
+		return tipc_sk_withdraw(tsk, NULL);
 
-	if (addr->addrtype == TIPC_SERVICE_ADDR)
-		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
+	if (ua->addrtype == TIPC_SERVICE_ADDR) {
+		ua->addrtype = TIPC_SERVICE_RANGE;
+		ua->sr.upper = ua->sr.lower;
+	}
+	if (ua->scope < 0) {
+		unbind = true;
+		ua->scope = -ua->scope;
+	}
+	/* Users may still use deprecated TIPC_ZONE_SCOPE */
+	if (ua->scope != TIPC_NODE_SCOPE)
+		ua->scope = TIPC_CLUSTER_SCOPE;
 
 	if (tsk->group)
 		return -EACCES;
 
-	if (addr->scope >= 0)
-		return tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq);
-	else
-		return tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
+	if (unbind)
+		return tipc_sk_withdraw(tsk, ua);
+	return tipc_sk_publish(tsk, ua);
 }
 
 int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
@@ -707,18 +714,17 @@ int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 
 static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 {
-	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr;
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
+	u32 atype = ua->addrtype;
 
 	if (alen) {
-		if (alen < sizeof(struct sockaddr_tipc))
+		if (!tipc_uaddr_valid(ua, alen))
 			return -EINVAL;
-		if (addr->family != AF_TIPC)
+		if (atype == TIPC_SOCKET_ADDR)
 			return -EAFNOSUPPORT;
-		if (addr->addrtype > TIPC_SERVICE_ADDR)
-			return -EAFNOSUPPORT;
-		if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) {
+		if (ua->sr.type < TIPC_RESERVED_TYPES) {
 			pr_warn_once("Can't bind to reserved service type %u\n",
-				     addr->addr.nameseq.type);
+				     ua->sr.type);
 			return -EACCES;
 		}
 	}
@@ -826,7 +832,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
 /**
  * tipc_sendmcast - send multicast message
  * @sock: socket structure
- * @seq: destination address
+ * @ua: destination address struct
  * @msg: message to send
  * @dlen: length of data to send
  * @timeout: timeout to wait for wakeup
@@ -834,7 +840,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
  * Called from function tipc_sendmsg(), which has done all sanity checks
  * Return: the number of bytes sent on success, or errno
  */
-static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
+static int tipc_sendmcast(struct  socket *sock, struct tipc_uaddr *ua,
 			  struct msghdr *msg, size_t dlen, long timeout)
 {
 	struct sock *sk = sock->sk;
@@ -842,7 +848,6 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct net *net = sock_net(sk);
 	int mtu = tipc_bcast_get_mtu(net);
-	struct tipc_mc_method *method = &tsk->mc_method;
 	struct sk_buff_head pkts;
 	struct tipc_nlist dsts;
 	int rc;
@@ -857,8 +862,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 
 	/* Lookup destination nodes */
 	tipc_nlist_init(&dsts, tipc_own_addr(net));
-	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
-				      seq->upper, &dsts);
+	tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts);
 	if (!dsts.local && !dsts.remote)
 		return -EHOSTUNREACH;
 
@@ -868,9 +872,9 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
 	msg_set_destport(hdr, 0);
 	msg_set_destnode(hdr, 0);
-	msg_set_nametype(hdr, seq->type);
-	msg_set_namelower(hdr, seq->lower);
-	msg_set_nameupper(hdr, seq->upper);
+	msg_set_nametype(hdr, ua->sr.type);
+	msg_set_namelower(hdr, ua->sr.lower);
+	msg_set_nameupper(hdr, ua->sr.upper);
 
 	/* Build message as chain of buffers */
 	__skb_queue_head_init(&pkts);
@@ -880,7 +884,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 	if (unlikely(rc == dlen)) {
 		trace_tipc_sk_sendmcast(sk, skb_peek(&pkts),
 					TIPC_DUMP_SK_SNDQ, " ");
-		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
+		rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts,
 				     &tsk->cong_link_cnt);
 	}
 
@@ -954,7 +958,7 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
 				   int dlen, long timeout)
 {
 	struct sock *sk = sock->sk;
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
@@ -962,8 +966,8 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
 	u32 node, port;
 	int rc;
 
-	node = dest->addr.id.node;
-	port = dest->addr.id.ref;
+	node = ua->sk.node;
+	port = ua->sk.ref;
 	if (!port && !node)
 		return -EHOSTUNREACH;
 
@@ -997,7 +1001,7 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
 static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 				   int dlen, long timeout)
 {
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct list_head *cong_links = &tsk->cong_links;
@@ -1008,16 +1012,13 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 	struct net *net = sock_net(sk);
 	u32 node, port, exclude;
 	struct list_head dsts;
-	u32 type, inst, scope;
 	int lookups = 0;
 	int dstcnt, rc;
 	bool cong;
 
 	INIT_LIST_HEAD(&dsts);
-
-	type = msg_nametype(hdr);
-	inst = dest->addr.name.name.instance;
-	scope = msg_lookup_scope(hdr);
+	ua->sa.type = msg_nametype(hdr);
+	ua->scope = msg_lookup_scope(hdr);
 
 	while (++lookups < 4) {
 		exclude = tipc_group_exclude(tsk->group);
@@ -1026,8 +1027,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 
 		/* Look for a non-congested destination member, if any */
 		while (1) {
-			if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
-						 &dstcnt, exclude, false))
+			if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt,
+						       exclude, false))
 				return -EHOSTUNREACH;
 			tipc_dest_pop(&dsts, &node, &port);
 			cong = tipc_group_cong(tsk->group, node, port, blks,
@@ -1082,7 +1083,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
 				 int dlen, long timeout)
 {
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
@@ -1107,9 +1108,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
 		return -EHOSTUNREACH;
 
 	/* Complete message header */
-	if (dest) {
+	if (ua) {
 		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
-		msg_set_nameinst(hdr, dest->addr.name.name.instance);
+		msg_set_nameinst(hdr, ua->sa.instance);
 	} else {
 		msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
 		msg_set_nameinst(hdr, 0);
@@ -1156,29 +1157,25 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
 static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
 				 int dlen, long timeout)
 {
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	struct sock *sk = sock->sk;
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct net *net = sock_net(sk);
-	u32 type, inst, scope, exclude;
 	struct list_head dsts;
-	u32 dstcnt;
+	u32 dstcnt, exclude;
 
 	INIT_LIST_HEAD(&dsts);
-
-	type = msg_nametype(hdr);
-	inst = dest->addr.name.name.instance;
-	scope = msg_lookup_scope(hdr);
+	ua->sa.type = msg_nametype(hdr);
+	ua->scope = msg_lookup_scope(hdr);
 	exclude = tipc_group_exclude(grp);
 
-	if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
-				 &dstcnt, exclude, true))
+	if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true))
 		return -EHOSTUNREACH;
 
 	if (dstcnt == 1) {
-		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
+		tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref);
 		return tipc_send_group_unicast(sock, m, dlen, timeout);
 	}
 
@@ -1198,17 +1195,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		       struct sk_buff_head *inputq)
 {
 	u32 self = tipc_own_addr(net);
-	u32 type, lower, upper, scope;
 	struct sk_buff *skb, *_skb;
 	u32 portid, onode;
 	struct sk_buff_head tmpq;
 	struct list_head dports;
 	struct tipc_msg *hdr;
+	struct tipc_uaddr ua;
 	int user, mtyp, hlen;
 	bool exact;
 
 	__skb_queue_head_init(&tmpq);
 	INIT_LIST_HEAD(&dports);
+	ua.addrtype = TIPC_SERVICE_RANGE;
 
 	skb = tipc_skb_peek(arrvq, &inputq->lock);
 	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
@@ -1217,7 +1215,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		mtyp = msg_type(hdr);
 		hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
 		onode = msg_orignode(hdr);
-		type = msg_nametype(hdr);
+		ua.sr.type = msg_nametype(hdr);
 
 		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
 			spin_lock_bh(&inputq->lock);
@@ -1232,24 +1230,23 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 
 		/* Group messages require exact scope match */
 		if (msg_in_group(hdr)) {
-			lower = 0;
-			upper = ~0;
-			scope = msg_lookup_scope(hdr);
+			ua.sr.lower = 0;
+			ua.sr.upper = ~0;
+			ua.scope = msg_lookup_scope(hdr);
 			exact = true;
 		} else {
 			/* TIPC_NODE_SCOPE means "any scope" in this context */
 			if (onode == self)
-				scope = TIPC_NODE_SCOPE;
+				ua.scope = TIPC_NODE_SCOPE;
 			else
-				scope = TIPC_CLUSTER_SCOPE;
+				ua.scope = TIPC_CLUSTER_SCOPE;
 			exact = false;
-			lower = msg_namelower(hdr);
-			upper = msg_nameupper(hdr);
+			ua.sr.lower = msg_namelower(hdr);
+			ua.sr.upper = msg_nameupper(hdr);
 		}
 
 		/* Create destination port list: */
-		tipc_nametbl_mc_lookup(net, type, lower, upper,
-				       scope, exact, &dports);
+		tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports);
 
 		/* Clone message per destination */
 		while (tipc_dest_pop(&dports, NULL, &portid)) {
@@ -1417,44 +1414,43 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 	struct list_head *clinks = &tsk->cong_links;
 	bool syn = !tipc_sk_type_connectionless(sk);
 	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
-	struct tipc_service_range *seq;
+	struct tipc_socket_addr skaddr;
 	struct sk_buff_head pkts;
-	u32 dport = 0, dnode = 0;
-	u32 type = 0, inst = 0;
-	int mtu, rc;
+	int atype, mtu, rc;
 
 	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
 		return -EMSGSIZE;
 
-	if (likely(dest)) {
-		if (unlikely(m->msg_namelen < sizeof(*dest)))
-			return -EINVAL;
-		if (unlikely(dest->family != AF_TIPC))
+	if (ua) {
+		if (!tipc_uaddr_valid(ua, m->msg_namelen))
 			return -EINVAL;
+		 atype = ua->addrtype;
 	}
 
+	/* If socket belongs to a communication group follow other paths */
 	if (grp) {
-		if (!dest)
+		if (!ua)
 			return tipc_send_group_bcast(sock, m, dlen, timeout);
-		if (dest->addrtype == TIPC_SERVICE_ADDR)
+		if (atype == TIPC_SERVICE_ADDR)
 			return tipc_send_group_anycast(sock, m, dlen, timeout);
-		if (dest->addrtype == TIPC_SOCKET_ADDR)
+		if (atype == TIPC_SOCKET_ADDR)
 			return tipc_send_group_unicast(sock, m, dlen, timeout);
-		if (dest->addrtype == TIPC_ADDR_MCAST)
+		if (atype == TIPC_SERVICE_RANGE)
 			return tipc_send_group_mcast(sock, m, dlen, timeout);
 		return -EINVAL;
 	}
 
-	if (unlikely(!dest)) {
-		dest = &tsk->peer;
-		if (!syn && dest->family != AF_TIPC)
+	if (!ua) {
+		ua = (struct tipc_uaddr *)&tsk->peer;
+		if (!syn && ua->family != AF_TIPC)
 			return -EDESTADDRREQ;
+		 atype = ua->addrtype;
 	}
 
 	if (unlikely(syn)) {
@@ -1464,54 +1460,51 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 			return -EISCONN;
 		if (tsk->published)
 			return -EOPNOTSUPP;
-		if (dest->addrtype == TIPC_SERVICE_ADDR) {
-			tsk->conn_type = dest->addr.name.name.type;
-			tsk->conn_instance = dest->addr.name.name.instance;
+		if (atype == TIPC_SERVICE_ADDR) {
+			tsk->conn_type = ua->sa.type;
+			tsk->conn_instance = ua->sa.instance;
 		}
 		msg_set_syn(hdr, 1);
 	}
 
-	seq = &dest->addr.nameseq;
-	if (dest->addrtype == TIPC_ADDR_MCAST)
-		return tipc_sendmcast(sock, seq, m, dlen, timeout);
-
-	if (dest->addrtype == TIPC_SERVICE_ADDR) {
-		type = dest->addr.name.name.type;
-		inst = dest->addr.name.name.instance;
-		dnode = dest->addr.name.domain;
-		dport = tipc_nametbl_translate(net, type, inst, &dnode);
-		if (unlikely(!dport && !dnode))
+	/* Determine destination */
+	if (atype == TIPC_SERVICE_RANGE) {
+		return tipc_sendmcast(sock, ua, m, dlen, timeout);
+	} else if (atype == TIPC_SERVICE_ADDR) {
+		skaddr.node = ua->lookup_node;
+		ua->scope = tipc_node2scope(skaddr.node);
+		if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr))
 			return -EHOSTUNREACH;
-	} else if (dest->addrtype == TIPC_SOCKET_ADDR) {
-		dnode = dest->addr.id.node;
+	} else if (atype == TIPC_SOCKET_ADDR) {
+		skaddr = ua->sk;
 	} else {
 		return -EINVAL;
 	}
 
 	/* Block or return if destination link is congested */
 	rc = tipc_wait_for_cond(sock, &timeout,
-				!tipc_dest_find(clinks, dnode, 0));
+				!tipc_dest_find(clinks, skaddr.node, 0));
 	if (unlikely(rc))
 		return rc;
 
-	if (dest->addrtype == TIPC_SERVICE_ADDR) {
+	/* Finally build message header */
+	msg_set_destnode(hdr, skaddr.node);
+	msg_set_destport(hdr, skaddr.ref);
+	if (atype == TIPC_SERVICE_ADDR) {
 		msg_set_type(hdr, TIPC_NAMED_MSG);
 		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
-		msg_set_nametype(hdr, type);
-		msg_set_nameinst(hdr, inst);
-		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
-		msg_set_destnode(hdr, dnode);
-		msg_set_destport(hdr, dport);
+		msg_set_nametype(hdr, ua->sa.type);
+		msg_set_nameinst(hdr, ua->sa.instance);
+		msg_set_lookup_scope(hdr, ua->scope);
 	} else { /* TIPC_SOCKET_ADDR */
 		msg_set_type(hdr, TIPC_DIRECT_MSG);
 		msg_set_lookup_scope(hdr, 0);
-		msg_set_destnode(hdr, dnode);
-		msg_set_destport(hdr, dest->addr.id.ref);
 		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 	}
 
+	/* Add message body */
 	__skb_queue_head_init(&pkts);
-	mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true);
+	mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true);
 	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
 	if (unlikely(rc != dlen))
 		return rc;
@@ -1520,10 +1513,11 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 		return -ENOMEM;
 	}
 
+	/* Send message */
 	trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
-	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+	rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid);
 	if (unlikely(rc == -ELINKCONG)) {
-		tipc_dest_push(clinks, dnode, 0);
+		tipc_dest_push(clinks, skaddr.node, 0);
 		tsk->cong_link_cnt++;
 		rc = 0;
 	}
@@ -2891,66 +2885,62 @@ static void tipc_sk_timeout(struct timer_list *t)
 	sock_put(sk);
 }
 
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
-			   struct tipc_service_range const *seq)
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua)
 {
 	struct sock *sk = &tsk->sk;
 	struct net *net = sock_net(sk);
-	struct publication *publ;
+	struct tipc_socket_addr skaddr;
+	struct publication *p;
 	u32 key;
 
-	if (scope != TIPC_NODE_SCOPE)
-		scope = TIPC_CLUSTER_SCOPE;
-
 	if (tipc_sk_connected(sk))
 		return -EINVAL;
 	key = tsk->portid + tsk->pub_count + 1;
 	if (key == tsk->portid)
 		return -EADDRINUSE;
-
-	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
-				    scope, tsk->portid, key);
-	if (unlikely(!publ))
+	skaddr.ref = tsk->portid;
+	skaddr.node = tipc_own_addr(net);
+	p = tipc_nametbl_publish(net, ua, &skaddr, key);
+	if (unlikely(!p))
 		return -EINVAL;
 
-	list_add(&publ->binding_sock, &tsk->publications);
+	list_add(&p->binding_sock, &tsk->publications);
 	tsk->pub_count++;
-	tsk->published = 1;
+	tsk->published = true;
 	return 0;
 }
 
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
-			    struct tipc_service_range const *seq)
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua)
 {
 	struct net *net = sock_net(&tsk->sk);
-	struct publication *publ;
-	struct publication *safe;
+	struct publication *safe, *p;
+	struct tipc_uaddr _ua;
 	int rc = -EINVAL;
 
-	if (scope != TIPC_NODE_SCOPE)
-		scope = TIPC_CLUSTER_SCOPE;
-
-	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
-		if (seq) {
-			if (publ->scope != scope)
-				continue;
-			if (publ->type != seq->type)
-				continue;
-			if (publ->lower != seq->lower)
-				continue;
-			if (publ->upper != seq->upper)
-				break;
-			tipc_nametbl_withdraw(net, publ->type, publ->lower,
-					      publ->upper, publ->key);
-			rc = 0;
-			break;
+	list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) {
+		if (!ua) {
+			tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope,
+				   p->sr.type, p->sr.lower, p->sr.upper);
+			tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key);
+			continue;
 		}
-		tipc_nametbl_withdraw(net, publ->type, publ->lower,
-				      publ->upper, publ->key);
+		/* Unbind specific publication */
+		if (p->scope != ua->scope)
+			continue;
+		if (p->sr.type != ua->sr.type)
+			continue;
+		if (p->sr.lower != ua->sr.lower)
+			continue;
+		if (p->sr.upper != ua->sr.upper)
+			break;
+		tipc_nametbl_withdraw(net, ua, &p->sk, p->key);
 		rc = 0;
+		break;
 	}
-	if (list_empty(&tsk->publications))
+	if (list_empty(&tsk->publications)) {
 		tsk->published = 0;
+		rc = 0;
+	}
 	return rc;
 }
 
@@ -3067,13 +3057,15 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
 	struct net *net = sock_net(&tsk->sk);
 	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
-	struct tipc_service_range seq;
+	struct tipc_uaddr ua;
 	int rc;
 
 	if (mreq->type < TIPC_RESERVED_TYPES)
 		return -EACCES;
 	if (mreq->scope > TIPC_NODE_SCOPE)
 		return -EINVAL;
+	if (mreq->scope != TIPC_NODE_SCOPE)
+		mreq->scope = TIPC_CLUSTER_SCOPE;
 	if (grp)
 		return -EACCES;
 	grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
@@ -3083,11 +3075,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
 	msg_set_lookup_scope(hdr, mreq->scope);
 	msg_set_nametype(hdr, mreq->type);
 	msg_set_dest_droppable(hdr, true);
-	seq.type = mreq->type;
-	seq.lower = mreq->instance;
-	seq.upper = seq.lower;
-	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
-	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope,
+		   mreq->type, mreq->instance, mreq->instance);
+	tipc_nametbl_build_group(net, grp, &ua);
+	rc = tipc_sk_publish(tsk, &ua);
 	if (rc) {
 		tipc_group_delete(net, grp);
 		tsk->group = NULL;
@@ -3104,15 +3095,17 @@ static int tipc_sk_leave(struct tipc_sock *tsk)
 {
 	struct net *net = sock_net(&tsk->sk);
 	struct tipc_group *grp = tsk->group;
-	struct tipc_service_range seq;
+	struct tipc_uaddr ua;
 	int scope;
 
 	if (!grp)
 		return -EINVAL;
-	tipc_group_self(grp, &seq, &scope);
+	ua.addrtype = TIPC_SERVICE_RANGE;
+	tipc_group_self(grp, &ua.sr, &scope);
+	ua.scope = scope;
 	tipc_group_delete(net, grp);
 	tsk->group = NULL;
-	tipc_sk_withdraw(tsk, scope, &seq);
+	tipc_sk_withdraw(tsk, &ua);
 	return 0;
 }
 
@@ -3711,11 +3704,11 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
 
 	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
 		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
+	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type))
 		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
+	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower))
 		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
+	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper))
 		goto attr_msg_cancel;
 
 	nla_nest_end(skb, attrs);
@@ -3863,9 +3856,9 @@ bool tipc_sk_filtering(struct sock *sk)
 		p = list_first_entry_or_null(&tsk->publications,
 					     struct publication, binding_sock);
 		if (p) {
-			type = p->type;
-			lower = p->lower;
-			upper = p->upper;
+			type = p->sr.type;
+			lower = p->sr.lower;
+			upper = p->sr.upper;
 		}
 	}
 
@@ -3964,9 +3957,9 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
 	if (tsk->published) {
 		p = list_first_entry_or_null(&tsk->publications,
 					     struct publication, binding_sock);
-		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0);
-		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0);
-		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0);
 	}
 	i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win);
 	i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index f6ad0005218c..8e00d739f03a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,77 +40,75 @@
 #include "subscr.h"
 
 static void tipc_sub_send_event(struct tipc_subscription *sub,
-				u32 found_lower, u32 found_upper,
-				u32 event, u32 port, u32 node)
+				struct publication *p,
+				u32 event)
 {
+	struct tipc_subscr *s = &sub->evt.s;
 	struct tipc_event *evt = &sub->evt;
 
 	if (sub->inactive)
 		return;
 	tipc_evt_write(evt, event, event);
-	tipc_evt_write(evt, found_lower, found_lower);
-	tipc_evt_write(evt, found_upper, found_upper);
-	tipc_evt_write(evt, port.ref, port);
-	tipc_evt_write(evt, port.node, node);
+	if (p) {
+		tipc_evt_write(evt, found_lower, p->sr.lower);
+		tipc_evt_write(evt, found_upper, p->sr.upper);
+		tipc_evt_write(evt, port.ref, p->sk.ref);
+		tipc_evt_write(evt, port.node, p->sk.node);
+	} else {
+		tipc_evt_write(evt, found_lower, s->seq.lower);
+		tipc_evt_write(evt, found_upper, s->seq.upper);
+		tipc_evt_write(evt, port.ref, 0);
+		tipc_evt_write(evt, port.node, 0);
+	}
 	tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
 }
 
 /**
  * tipc_sub_check_overlap - test for subscription overlap with the given values
- * @seq: tipc_name_seq to check
- * @found_lower: lower value to test
- * @found_upper: upper value to test
+ * @subscribed: the service range subscribed for
+ * @found: the service range we are checning for match
  *
- * Return: 1 if there is overlap, otherwise 0.
+ * Returns true if there is overlap, otherwise false.
  */
-int tipc_sub_check_overlap(struct tipc_service_range *seq, u32 found_lower,
-			   u32 found_upper)
+static bool tipc_sub_check_overlap(struct tipc_service_range *subscribed,
+				   struct tipc_service_range *found)
 {
-	if (found_lower < seq->lower)
-		found_lower = seq->lower;
-	if (found_upper > seq->upper)
-		found_upper = seq->upper;
-	if (found_lower > found_upper)
-		return 0;
-	return 1;
+	u32 found_lower = found->lower;
+	u32 found_upper = found->upper;
+
+	if (found_lower < subscribed->lower)
+		found_lower = subscribed->lower;
+	if (found_upper > subscribed->upper)
+		found_upper = subscribed->upper;
+	return found_lower <= found_upper;
 }
 
 void tipc_sub_report_overlap(struct tipc_subscription *sub,
-			     u32 found_lower, u32 found_upper,
-			     u32 event, u32 port, u32 node,
-			     u32 scope, int must)
+			     struct publication *p,
+			     u32 event, bool must)
 {
-	struct tipc_subscr *s = &sub->evt.s;
-	u32 filter = tipc_sub_read(s, filter);
-	struct tipc_service_range seq;
+	struct tipc_service_range *sr = &sub->s.seq;
+	u32 filter = sub->s.filter;
 
-	seq.type = tipc_sub_read(s, seq.type);
-	seq.lower = tipc_sub_read(s, seq.lower);
-	seq.upper = tipc_sub_read(s, seq.upper);
-
-	if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
+	if (!tipc_sub_check_overlap(sr, &p->sr))
 		return;
-
 	if (!must && !(filter & TIPC_SUB_PORTS))
 		return;
-	if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
+	if (filter & TIPC_SUB_CLUSTER_SCOPE && p->scope == TIPC_NODE_SCOPE)
 		return;
-	if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
+	if (filter & TIPC_SUB_NODE_SCOPE && p->scope != TIPC_NODE_SCOPE)
 		return;
 	spin_lock(&sub->lock);
-	tipc_sub_send_event(sub, found_lower, found_upper,
-			    event, port, node);
+	tipc_sub_send_event(sub, p, event);
 	spin_unlock(&sub->lock);
 }
 
 static void tipc_sub_timeout(struct timer_list *t)
 {
 	struct tipc_subscription *sub = from_timer(sub, t, timer);
-	struct tipc_subscr *s = &sub->evt.s;
 
 	spin_lock(&sub->lock);
-	tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
-			    TIPC_SUBSCR_TIMEOUT, 0, 0);
+	tipc_sub_send_event(sub, NULL, TIPC_SUBSCR_TIMEOUT);
 	sub->inactive = true;
 	spin_unlock(&sub->lock);
 }
@@ -134,12 +132,14 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 					     struct tipc_subscr *s,
 					     int conid)
 {
+	u32 lower = tipc_sub_read(s, seq.lower);
+	u32 upper = tipc_sub_read(s, seq.upper);
 	u32 filter = tipc_sub_read(s, filter);
 	struct tipc_subscription *sub;
 	u32 timeout;
 
 	if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
-	    (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+	    lower > upper) {
 		pr_warn("Subscription rejected, illegal request\n");
 		return NULL;
 	}
@@ -154,6 +154,12 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 	sub->conid = conid;
 	sub->inactive = false;
 	memcpy(&sub->evt.s, s, sizeof(*s));
+	sub->s.seq.type = tipc_sub_read(s, seq.type);
+	sub->s.seq.lower = lower;
+	sub->s.seq.upper = upper;
+	sub->s.filter = filter;
+	sub->s.timeout = tipc_sub_read(s, timeout);
+	memcpy(sub->s.usr_handle, s->usr_handle, 8);
 	spin_lock_init(&sub->lock);
 	kref_init(&sub->kref);
 	if (!tipc_nametbl_subscribe(sub)) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 3ded27391d54..ddea6554ec46 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2003-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,7 @@
 #define TIPC_MAX_SUBSCR         65535
 #define TIPC_MAX_PUBL           65535
 
+struct publication;
 struct tipc_subscription;
 struct tipc_conn;
 
@@ -59,12 +60,13 @@ struct tipc_conn;
  * @lock: serialize up/down and timer events
  */
 struct tipc_subscription {
+	struct tipc_subscr s;
+	struct tipc_event evt;
 	struct kref kref;
 	struct net *net;
 	struct timer_list timer;
 	struct list_head service_list;
 	struct list_head sub_list;
-	struct tipc_event evt;
 	int conid;
 	bool inactive;
 	spinlock_t lock;
@@ -74,13 +76,9 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 					     struct tipc_subscr *s,
 					     int conid);
 void tipc_sub_unsubscribe(struct tipc_subscription *sub);
-
-int tipc_sub_check_overlap(struct tipc_service_range *seq,
-			   u32 found_lower, u32 found_upper);
 void tipc_sub_report_overlap(struct tipc_subscription *sub,
-			     u32 found_lower, u32 found_upper,
-			     u32 event, u32 port, u32 node,
-			     u32 scope, int must);
+			     struct publication *p,
+			     u32 event, bool must);
 
 int __net_init tipc_topsrv_init_net(struct net *net);
 void __net_exit tipc_topsrv_exit_net(struct net *net);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index d9cd229aa111..790c6b7ecb26 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -601,7 +601,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 	if (!info ||
 	    before(seq, info->end_seq - info->len)) {
 		/* if retransmit_hint is irrelevant start
-		 * from the beggining of the list
+		 * from the beginning of the list
 		 */
 		info = list_first_entry_or_null(&context->records_list,
 						struct tls_record_info, list);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4faabd1ecfd1..a71ed664da0a 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -445,6 +445,97 @@ static void xsk_destruct_skb(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
+					      struct xdp_desc *desc)
+{
+	struct xsk_buff_pool *pool = xs->pool;
+	u32 hr, len, ts, offset, copy, copied;
+	struct sk_buff *skb;
+	struct page *page;
+	void *buffer;
+	int err, i;
+	u64 addr;
+
+	hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
+
+	skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
+	if (unlikely(!skb))
+		return ERR_PTR(err);
+
+	skb_reserve(skb, hr);
+
+	addr = desc->addr;
+	len = desc->len;
+	ts = pool->unaligned ? len : pool->chunk_size;
+
+	buffer = xsk_buff_raw_get_data(pool, addr);
+	offset = offset_in_page(buffer);
+	addr = buffer - pool->addrs;
+
+	for (copied = 0, i = 0; copied < len; i++) {
+		page = pool->umem->pgs[addr >> PAGE_SHIFT];
+		get_page(page);
+
+		copy = min_t(u32, PAGE_SIZE - offset, len - copied);
+		skb_fill_page_desc(skb, i, page, offset, copy);
+
+		copied += copy;
+		addr += copy;
+		offset = 0;
+	}
+
+	skb->len += len;
+	skb->data_len += len;
+	skb->truesize += ts;
+
+	refcount_add(ts, &xs->sk.sk_wmem_alloc);
+
+	return skb;
+}
+
+static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
+				     struct xdp_desc *desc)
+{
+	struct net_device *dev = xs->dev;
+	struct sk_buff *skb;
+
+	if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
+		skb = xsk_build_skb_zerocopy(xs, desc);
+		if (IS_ERR(skb))
+			return skb;
+	} else {
+		u32 hr, tr, len;
+		void *buffer;
+		int err;
+
+		hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+		tr = dev->needed_tailroom;
+		len = desc->len;
+
+		skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
+		if (unlikely(!skb))
+			return ERR_PTR(err);
+
+		skb_reserve(skb, hr);
+		skb_put(skb, len);
+
+		buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
+		err = skb_store_bits(skb, 0, buffer, len);
+		if (unlikely(err)) {
+			kfree_skb(skb);
+			return ERR_PTR(err);
+		}
+	}
+
+	skb->dev = dev;
+	skb->priority = xs->sk.sk_priority;
+	skb->mark = xs->sk.sk_mark;
+	skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
+	skb->destructor = xsk_destruct_skb;
+
+	return skb;
+}
+
 static int xsk_generic_xmit(struct sock *sk)
 {
 	struct xdp_sock *xs = xdp_sk(sk);
@@ -461,43 +552,30 @@ static int xsk_generic_xmit(struct sock *sk)
 		goto out;
 
 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
-		char *buffer;
-		u64 addr;
-		u32 len;
-
 		if (max_batch-- == 0) {
 			err = -EAGAIN;
 			goto out;
 		}
 
-		len = desc.len;
-		skb = sock_alloc_send_skb(sk, len, 1, &err);
-		if (unlikely(!skb))
+		skb = xsk_build_skb(xs, &desc);
+		if (IS_ERR(skb)) {
+			err = PTR_ERR(skb);
 			goto out;
+		}
 
-		skb_put(skb, len);
-		addr = desc.addr;
-		buffer = xsk_buff_raw_get_data(xs->pool, addr);
-		err = skb_store_bits(skb, 0, buffer, len);
 		/* This is the backpressure mechanism for the Tx path.
 		 * Reserve space in the completion queue and only proceed
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
 		 */
 		spin_lock_irqsave(&xs->pool->cq_lock, flags);
-		if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+		if (xskq_prod_reserve(xs->pool->cq)) {
 			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 			kfree_skb(skb);
 			goto out;
 		}
 		spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 
-		skb->dev = xs->dev;
-		skb->priority = sk->sk_priority;
-		skb->mark = sk->sk_mark;
-		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
-		skb->destructor = xsk_destruct_skb;
-
 		err = __dev_direct_xmit(skb, xs->queue_id);
 		if  (err == NETDEV_TX_BUSY) {
 			/* Tell user-space to retry the send */
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 2823b7c3302d..2ac3802c2cd7 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -47,19 +47,18 @@ struct xsk_queue {
 	u64 queue_empty_descs;
 };
 
-/* The structure of the shared state of the rings are the same as the
- * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion
- * ring, the kernel is the producer and user space is the consumer. For
- * the Tx and fill rings, the kernel is the consumer and user space is
- * the producer.
+/* The structure of the shared state of the rings are a simple
+ * circular buffer, as outlined in
+ * Documentation/core-api/circular-buffers.rst. For the Rx and
+ * completion ring, the kernel is the producer and user space is the
+ * consumer. For the Tx and fill rings, the kernel is the consumer and
+ * user space is the producer.
  *
  * producer                         consumer
  *
- * if (LOAD ->consumer) {           LOAD ->producer
- *                    (A)           smp_rmb()       (C)
+ * if (LOAD ->consumer) {  (A)      LOAD.acq ->producer  (C)
  *    STORE $data                   LOAD $data
- *    smp_wmb()       (B)           smp_mb()        (D)
- *    STORE ->producer              STORE ->consumer
+ *    STORE.rel ->producer (B)      STORE.rel ->consumer (D)
  * }
  *
  * (A) pairs with (D), and (B) pairs with (C).
@@ -78,7 +77,8 @@ struct xsk_queue {
  *
  * (A) is a control dependency that separates the load of ->consumer
  * from the stores of $data. In case ->consumer indicates there is no
- * room in the buffer to store $data we do not. So no barrier is needed.
+ * room in the buffer to store $data we do not. The dependency will
+ * order both of the stores after the loads. So no barrier is needed.
  *
  * (D) protects the load of the data to be observed to happen after the
  * store of the consumer pointer. If we did not have this memory
@@ -227,15 +227,13 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
 
 static inline void __xskq_cons_release(struct xsk_queue *q)
 {
-	smp_mb(); /* D, matches A */
-	WRITE_ONCE(q->ring->consumer, q->cached_cons);
+	smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */
 }
 
 static inline void __xskq_cons_peek(struct xsk_queue *q)
 {
 	/* Refresh the local pointer */
-	q->cached_prod = READ_ONCE(q->ring->producer);
-	smp_rmb(); /* C, matches B */
+	q->cached_prod = smp_load_acquire(&q->ring->producer);  /* C, matches B */
 }
 
 static inline void xskq_cons_get_entries(struct xsk_queue *q)
@@ -397,9 +395,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
 
 static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
 {
-	smp_wmb(); /* B, matches C */
-
-	WRITE_ONCE(q->ring->producer, idx);
+	smp_store_release(&q->ring->producer, idx); /* B, matches C */
 }
 
 static inline void xskq_prod_submit(struct xsk_queue *q)
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 113fd9017203..67b4ce504852 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -87,7 +87,6 @@ static void xsk_map_free(struct bpf_map *map)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 
-	bpf_clear_redirect_map(map);
 	synchronize_net();
 	bpf_map_area_free(m);
 }
@@ -125,6 +124,16 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 	return insn - insn_buf;
 }
 
+static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+
+	if (key >= map->max_entries)
+		return NULL;
+
+	return READ_ONCE(m->xsk_map[key]);
+}
+
 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
@@ -215,6 +224,11 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 	return 0;
 }
 
+static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+}
+
 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
 			     struct xdp_sock **map_entry)
 {
@@ -247,4 +261,5 @@ const struct bpf_map_ops xsk_map_ops = {
 	.map_check_btf = map_check_no_btf,
 	.map_btf_name = "xsk_map",
 	.map_btf_id = &xsk_map_btf_id,
+	.map_redirect = xsk_map_redirect,
 };
diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index f9c53ca5cf93..f4adeed5f5f0 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -28,10 +28,28 @@ across the sample scripts.  Usage example is printed on errors::
   -b : ($BURST)     HW level bursting of SKBs
   -v : ($VERBOSE)   verbose
   -x : ($DEBUG)     debug
+  -6 : ($IP6)       IPv6
+  -w : ($DELAY)     Tx Delay value (ns)
+  -a : ($APPEND)    Script will not reset generator's state, but will append its config
 
 The global variable being set is also listed.  E.g. the required
 interface/device parameter "-i" sets variable $DEV.
 
+"-a" parameter may be used to create different flows simultaneously.
+In this mode script will keep the existing config, will append its settings.
+In this mode you'll have to manually run traffic with "pg_ctrl start".
+
+For example you may use:
+
+    source ./samples/pktgen/functions.sh
+    pg_ctrl reset
+    # add first device
+    ./pktgen_sample06_numa_awared_queue_irq_affinity.sh -a -i ens1f0 -m 34:80:0d:a3:fc:c9 -t 8
+    # add second device
+    ./pktgen_sample06_numa_awared_queue_irq_affinity.sh -a -i ens1f1 -m 34:80:0d:a3:fc:c9 -t 8
+    # run joint traffic on two devs
+    pg_ctrl start
+
 Common functions
 ----------------
 The functions.sh file provides; Three different shell functions for
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
index dae06d5b38fa..a335393157eb 100644
--- a/samples/pktgen/functions.sh
+++ b/samples/pktgen/functions.sh
@@ -108,7 +108,12 @@ function pgset() {
     fi
 }
 
-[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT
+if [[ -z "$APPEND" ]]; then
+	if [[ $EUID -eq 0 ]]; then
+		# Cleanup pktgen setup on exit if thats not "append mode"
+		trap 'pg_ctrl "reset"' EXIT
+	fi
+fi
 
 ## -- General shell tricks --
 
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index ff0ed474fee9..b4c1b371e4b8 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -19,12 +19,14 @@ function usage() {
     echo "  -v : (\$VERBOSE)   verbose"
     echo "  -x : (\$DEBUG)     debug"
     echo "  -6 : (\$IP6)       IPv6"
+    echo "  -w : (\$DELAY)     Tx Delay value (ns)"
+    echo "  -a : (\$APPEND)    Script will not reset generator's state, but will append its config"
     echo ""
 }
 
 ##  --- Parse command line arguments / parameters ---
 ## echo "Commandline options:"
-while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
+while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6a" option; do
     case $option in
         i) # interface
           export DEV=$OPTARG
@@ -66,6 +68,10 @@ while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
 	  export BURST=$OPTARG
 	  info "SKB bursting: BURST=$BURST"
           ;;
+        w)
+	  export DELAY=$OPTARG
+	  info "DELAY=$DELAY"
+          ;;
         v)
           export VERBOSE=yes
           info "Verbose mode: VERBOSE=$VERBOSE"
@@ -78,6 +84,10 @@ while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
 	  export IP6=6
 	  info "IP6: IP6=$IP6"
 	  ;;
+        a)
+          export APPEND=yes
+          info "Append mode: APPEND=$APPEND"
+          ;;
         h|?|*)
           usage;
           err 2 "[ERROR] Unknown parameters!!!"
@@ -100,6 +110,9 @@ if [ -z "$THREADS" ]; then
     export THREADS=1
 fi
 
+# default DELAY
+[ -z "$DELAY" ] && export DELAY=0 # Zero means max speed
+
 export L_THREAD=$(( THREADS + F_THREAD - 1 ))
 
 if [ -z "$DEV" ]; then
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index 1b6204125d2d..30a610b541ad 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -50,9 +50,6 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"        # Zero means max speed
-
 # General cleanup everything since last run
 pg_ctrl "reset"
 
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index e607cb369b20..a6195bd77532 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -33,9 +33,6 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"        # Zero means max speed
-
 # General cleanup everything since last run
 pg_ctrl "reset"
 
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index a4e250b45dce..a09f3422fbcc 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -31,20 +31,17 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"        # Zero means max speed
-
 # Flow variation random source port between min and max
 UDP_SRC_MIN=9
 UDP_SRC_MAX=109
 
 # General cleanup everything since last run
 # (especially important if other threads were configured by other scripts)
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Add remove all other devices and add_device $DEV to thread 0
 thread=0
-pg_thread $thread "rem_device_all"
+[ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
 pg_thread $thread "add_device" $DEV
 
 # How many packets to send (zero means indefinitely)
@@ -80,11 +77,15 @@ pg_set $DEV "flag UDPSRC_RND"
 pg_set $DEV "udp_src_min $UDP_SRC_MIN"
 pg_set $DEV "udp_src_max $UDP_SRC_MAX"
 
-# start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
+if [ -z "$APPEND" ]; then
+    # start_run
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
 
-# Print results
-echo "Result device: $DEV"
-cat /proc/net/pktgen/$DEV
+    # Print results
+    echo "Result device: $DEV"
+    cat /proc/net/pktgen/$DEV
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
+\ No newline at end of file
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index cb2495fcdc60..acae8ede0d6c 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -17,7 +17,6 @@ source ${basedir}/parameters.sh
 [ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely
 
 # Base Config
-DELAY="0"        # Zero means max speed
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 
 # Flow variation random source port between min and max
@@ -39,7 +38,7 @@ if [ -n "$DST_PORT" ]; then
 fi
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
@@ -48,7 +47,7 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Notice config queue to map to cpu (mirrors smp_processor_id())
@@ -82,14 +81,18 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     pg_set $dev "udp_src_max $UDP_SRC_MAX"
 done
 
-# start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
+if [ -z "$APPEND" ]; then
+    # start_run
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
 
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
-    dev=${DEV}@${thread}
-    echo "Device: $dev"
-    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+    # Print results
+    for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+        dev=${DEV}@${thread}
+        echo "Device: $dev"
+        cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index fff50765a5aa..5adcf954de73 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -42,18 +42,15 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"  # Zero means max speed
-
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Base config
@@ -97,5 +94,9 @@ function control_c() {
 # trap keyboard interrupt (Ctrl-C)
 trap control_c SIGINT
 
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index 2cd6b701400d..ddce876635aa 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -34,9 +34,6 @@ fi
 [ -z "$FLOWS" ]     && FLOWS="8000"
 [ -z "$FLOWLEN" ]   && FLOWLEN="10"
 
-# Base Config
-DELAY="0"  # Zero means max speed
-
 if [[ -n "$BURST" ]]; then
     err 1 "Bursting not supported for this mode"
 fi
@@ -45,14 +42,14 @@ fi
 read -r SRC_MIN SRC_MAX <<< $(parse_addr 198.18.0.0/15)
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Base config
@@ -107,7 +104,11 @@ function print_result() {
 # trap keyboard interrupt (Ctrl-C)
 trap true SIGINT
 
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
 
-print_result
+    print_result
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 4cb6252ade39..4a65fe2fcee9 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -31,18 +31,15 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"  # Zero means max speed
-
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Base config
@@ -91,7 +88,11 @@ function print_result() {
 # trap keyboard interrupt (Ctrl-C)
 trap true SIGINT
 
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
 
-print_result
+    print_result
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 728106060a02..10f1da571f40 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -15,7 +15,6 @@ root_check_run_with_sudo "$@"
 source ${basedir}/parameters.sh
 
 # Base Config
-DELAY="0"        # Zero means max speed
 [ -z "$COUNT" ]     && COUNT="20000000"   # Zero means indefinitely
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 
@@ -45,7 +44,7 @@ if [ -n "$DST_PORT" ]; then
 fi
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((i = 0; i < $THREADS; i++)); do
@@ -59,7 +58,7 @@ for ((i = 0; i < $THREADS; i++)); do
     info "irq ${irq_array[$i]} is set affinity to `cat /proc/irq/${irq_array[$i]}/smp_affinity_list`"
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # select queue and bind the queue and $dev in 1:1 relationship
@@ -100,14 +99,18 @@ for ((i = 0; i < $THREADS; i++)); do
 done
 
 # start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
-
-# Print results
-for ((i = 0; i < $THREADS; i++)); do
-    thread=${cpu_array[$((i+F_THREAD))]}
-    dev=${DEV}@${thread}
-    echo "Device: $dev"
-    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
+
+    # Print results
+    for ((i = 0; i < $THREADS; i++)); do
+        thread=${cpu_array[$((i+F_THREAD))]}
+        dev=${DEV}@${thread}
+        echo "Device: $dev"
+        cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_doc.py
index 867ada23281c..2d94025b38e9 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_doc.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
 # Copyright (C) 2018-2019 Netronome Systems, Inc.
+# Copyright (C) 2021 Isovalent, Inc.
 
 # In case user attempts to run with Python 2.
 from __future__ import print_function
@@ -13,6 +14,9 @@ import sys, os
 class NoHelperFound(BaseException):
     pass
 
+class NoSyscallCommandFound(BaseException):
+    pass
+
 class ParsingError(BaseException):
     def __init__(self, line='<line not provided>', reader=None):
         if reader:
@@ -22,18 +26,27 @@ class ParsingError(BaseException):
         else:
             BaseException.__init__(self, 'Error parsing line: %s' % line)
 
-class Helper(object):
+
+class APIElement(object):
     """
-    An object representing the description of an eBPF helper function.
-    @proto: function prototype of the helper function
-    @desc: textual description of the helper function
-    @ret: description of the return value of the helper function
+    An object representing the description of an aspect of the eBPF API.
+    @proto: prototype of the API symbol
+    @desc: textual description of the symbol
+    @ret: (optional) description of any associated return value
     """
     def __init__(self, proto='', desc='', ret=''):
         self.proto = proto
         self.desc = desc
         self.ret = ret
 
+
+class Helper(APIElement):
+    """
+    An object representing the description of an eBPF helper function.
+    @proto: function prototype of the helper function
+    @desc: textual description of the helper function
+    @ret: description of the return value of the helper function
+    """
     def proto_break_down(self):
         """
         Break down helper function protocol into smaller chunks: return type,
@@ -60,6 +73,7 @@ class Helper(object):
 
         return res
 
+
 class HeaderParser(object):
     """
     An object used to parse a file in order to extract the documentation of a
@@ -72,6 +86,13 @@ class HeaderParser(object):
         self.reader = open(filename, 'r')
         self.line = ''
         self.helpers = []
+        self.commands = []
+
+    def parse_element(self):
+        proto    = self.parse_symbol()
+        desc     = self.parse_desc()
+        ret      = self.parse_ret()
+        return APIElement(proto=proto, desc=desc, ret=ret)
 
     def parse_helper(self):
         proto    = self.parse_proto()
@@ -79,6 +100,18 @@ class HeaderParser(object):
         ret      = self.parse_ret()
         return Helper(proto=proto, desc=desc, ret=ret)
 
+    def parse_symbol(self):
+        p = re.compile(' \* ?(.+)$')
+        capture = p.match(self.line)
+        if not capture:
+            raise NoSyscallCommandFound
+        end_re = re.compile(' \* ?NOTES$')
+        end = end_re.match(self.line)
+        if end:
+            raise NoSyscallCommandFound
+        self.line = self.reader.readline()
+        return capture.group(1)
+
     def parse_proto(self):
         # Argument can be of shape:
         #   - "void"
@@ -140,16 +173,29 @@ class HeaderParser(object):
                     break
         return ret
 
-    def run(self):
-        # Advance to start of helper function descriptions.
-        offset = self.reader.read().find('* Start of BPF helper function descriptions:')
+    def seek_to(self, target, help_message):
+        self.reader.seek(0)
+        offset = self.reader.read().find(target)
         if offset == -1:
-            raise Exception('Could not find start of eBPF helper descriptions list')
+            raise Exception(help_message)
         self.reader.seek(offset)
         self.reader.readline()
         self.reader.readline()
         self.line = self.reader.readline()
 
+    def parse_syscall(self):
+        self.seek_to('* DOC: eBPF Syscall Commands',
+                     'Could not find start of eBPF syscall descriptions list')
+        while True:
+            try:
+                command = self.parse_element()
+                self.commands.append(command)
+            except NoSyscallCommandFound:
+                break
+
+    def parse_helpers(self):
+        self.seek_to('* Start of BPF helper function descriptions:',
+                     'Could not find start of eBPF helper descriptions list')
         while True:
             try:
                 helper = self.parse_helper()
@@ -157,6 +203,9 @@ class HeaderParser(object):
             except NoHelperFound:
                 break
 
+    def run(self):
+        self.parse_syscall()
+        self.parse_helpers()
         self.reader.close()
 
 ###############################################################################
@@ -165,10 +214,11 @@ class Printer(object):
     """
     A generic class for printers. Printers should be created with an array of
     Helper objects, and implement a way to print them in the desired fashion.
-    @helpers: array of Helper objects to print to standard output
+    @parser: A HeaderParser with objects to print to standard output
     """
-    def __init__(self, helpers):
-        self.helpers = helpers
+    def __init__(self, parser):
+        self.parser = parser
+        self.elements = []
 
     def print_header(self):
         pass
@@ -181,19 +231,23 @@ class Printer(object):
 
     def print_all(self):
         self.print_header()
-        for helper in self.helpers:
-            self.print_one(helper)
+        for elem in self.elements:
+            self.print_one(elem)
         self.print_footer()
 
+
 class PrinterRST(Printer):
     """
-    A printer for dumping collected information about helpers as a ReStructured
-    Text page compatible with the rst2man program, which can be used to
-    generate a manual page for the helpers.
-    @helpers: array of Helper objects to print to standard output
+    A generic class for printers that print ReStructured Text. Printers should
+    be created with a HeaderParser object, and implement a way to print API
+    elements in the desired fashion.
+    @parser: A HeaderParser with objects to print to standard output
     """
-    def print_header(self):
-        header = '''\
+    def __init__(self, parser):
+        self.parser = parser
+
+    def print_license(self):
+        license = '''\
 .. Copyright (C) All BPF authors and contributors from 2014 to present.
 .. See git log include/uapi/linux/bpf.h in kernel tree for details.
 .. 
@@ -221,9 +275,39 @@ class PrinterRST(Printer):
 .. 
 .. Please do not edit this file. It was generated from the documentation
 .. located in file include/uapi/linux/bpf.h of the Linux kernel sources
-.. (helpers description), and from scripts/bpf_helpers_doc.py in the same
+.. (helpers description), and from scripts/bpf_doc.py in the same
 .. repository (header and footer).
+'''
+        print(license)
 
+    def print_elem(self, elem):
+        if (elem.desc):
+            print('\tDescription')
+            # Do not strip all newline characters: formatted code at the end of
+            # a section must be followed by a blank line.
+            for line in re.sub('\n$', '', elem.desc, count=1).split('\n'):
+                print('{}{}'.format('\t\t' if line else '', line))
+
+        if (elem.ret):
+            print('\tReturn')
+            for line in elem.ret.rstrip().split('\n'):
+                print('{}{}'.format('\t\t' if line else '', line))
+
+        print('')
+
+
+class PrinterHelpersRST(PrinterRST):
+    """
+    A printer for dumping collected information about helpers as a ReStructured
+    Text page compatible with the rst2man program, which can be used to
+    generate a manual page for the helpers.
+    @parser: A HeaderParser with Helper objects to print to standard output
+    """
+    def __init__(self, parser):
+        self.elements = parser.helpers
+
+    def print_header(self):
+        header = '''\
 ===========
 BPF-HELPERS
 ===========
@@ -264,6 +348,7 @@ kernel at the top).
 HELPERS
 =======
 '''
+        PrinterRST.print_license(self)
         print(header)
 
     def print_footer(self):
@@ -380,27 +465,50 @@ SEE ALSO
 
     def print_one(self, helper):
         self.print_proto(helper)
+        self.print_elem(helper)
 
-        if (helper.desc):
-            print('\tDescription')
-            # Do not strip all newline characters: formatted code at the end of
-            # a section must be followed by a blank line.
-            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
-                print('{}{}'.format('\t\t' if line else '', line))
 
-        if (helper.ret):
-            print('\tReturn')
-            for line in helper.ret.rstrip().split('\n'):
-                print('{}{}'.format('\t\t' if line else '', line))
+class PrinterSyscallRST(PrinterRST):
+    """
+    A printer for dumping collected information about the syscall API as a
+    ReStructured Text page compatible with the rst2man program, which can be
+    used to generate a manual page for the syscall.
+    @parser: A HeaderParser with APIElement objects to print to standard
+             output
+    """
+    def __init__(self, parser):
+        self.elements = parser.commands
+
+    def print_header(self):
+        header = '''\
+===
+bpf
+===
+-------------------------------------------------------------------------------
+Perform a command on an extended BPF object
+-------------------------------------------------------------------------------
+
+:Manual section: 2
+
+COMMANDS
+========
+'''
+        PrinterRST.print_license(self)
+        print(header)
+
+    def print_one(self, command):
+        print('**%s**' % (command.proto))
+        self.print_elem(command)
 
-        print('')
 
 class PrinterHelpers(Printer):
     """
     A printer for dumping collected information about helpers as C header to
     be included from BPF program.
-    @helpers: array of Helper objects to print to standard output
+    @parser: A HeaderParser with Helper objects to print to standard output
     """
+    def __init__(self, parser):
+        self.elements = parser.helpers
 
     type_fwds = [
             'struct bpf_fib_lookup',
@@ -511,7 +619,7 @@ class PrinterHelpers(Printer):
 
     def print_header(self):
         header = '''\
-/* This is auto-generated file. See bpf_helpers_doc.py for details. */
+/* This is auto-generated file. See bpf_doc.py for details. */
 
 /* Forward declarations of BPF structs */'''
 
@@ -589,8 +697,13 @@ script = os.path.abspath(sys.argv[0])
 linuxRoot = os.path.dirname(os.path.dirname(script))
 bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
 
+printers = {
+        'helpers': PrinterHelpersRST,
+        'syscall': PrinterSyscallRST,
+}
+
 argParser = argparse.ArgumentParser(description="""
-Parse eBPF header file and generate documentation for eBPF helper functions.
+Parse eBPF header file and generate documentation for the eBPF API.
 The RST-formatted output produced can be turned into a manual page with the
 rst2man utility.
 """)
@@ -601,6 +714,8 @@ if (os.path.isfile(bpfh)):
                            default=bpfh)
 else:
     argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
+argParser.add_argument('target', nargs='?', default='helpers',
+                       choices=printers.keys(), help='eBPF API target')
 args = argParser.parse_args()
 
 # Parse file.
@@ -609,7 +724,9 @@ headerParser.run()
 
 # Print formatted output to standard output.
 if args.header:
-    printer = PrinterHelpers(headerParser.helpers)
+    if args.target != 'helpers':
+        raise NotImplementedError('Only helpers header generation is supported')
+    printer = PrinterHelpers(headerParser)
 else:
-    printer = PrinterRST(headerParser.helpers)
+    printer = printers[args.target](headerParser)
 printer.print_all()
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index b69231918686..d59276f48d4f 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -88,6 +88,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_NEWVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_DELVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_GETVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+	{ RTM_NEWNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_DELNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_GETNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -171,7 +174,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 		 * structures at the top of this file with the new mappings
 		 * before updating the BUILD_BUG_ON() macro!
 		 */
-		BUILD_BUG_ON(RTM_MAX != (RTM_NEWVLAN + 3));
+		BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOPBUCKET + 3));
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 				 sizeof(nlmsg_route_perms));
 		break;
diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
deleted file mode 100644
index 854d084026dd..000000000000
--- a/tools/bpf/Makefile.helpers
+++ /dev/null
@@ -1,60 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ifndef allow-override
-  include ../scripts/Makefile.include
-  include ../scripts/utilities.mak
-else
-  # Assume Makefile.helpers is being run from bpftool/Documentation
-  # subdirectory. Go up two more directories to fetch bpf.h header and
-  # associated script.
-  UP2DIR := ../../
-endif
-
-INSTALL ?= install
-RM ?= rm -f
-RMDIR ?= rmdir --ignore-fail-on-non-empty
-
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
-prefix ?= /usr/local
-mandir ?= $(prefix)/man
-man7dir = $(mandir)/man7
-
-HELPERS_RST = bpf-helpers.rst
-MAN7_RST = $(HELPERS_RST)
-
-_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
-DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
-
-helpers: man7
-man7: $(DOC_MAN7)
-
-RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
-
-$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
-	$(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
-
-$(OUTPUT)%.7: $(OUTPUT)%.rst
-ifndef RST2MAN_DEP
-	$(error "rst2man not found, but required to generate man pages")
-endif
-	$(QUIET_GEN)rst2man $< > $@
-
-helpers-clean:
-	$(call QUIET_CLEAN, eBPF_helpers-manpage)
-	$(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
-
-helpers-install: helpers
-	$(call QUIET_INSTALL, eBPF_helpers-manpage)
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
-	$(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
-
-helpers-uninstall:
-	$(call QUIET_UNINST, eBPF_helpers-manpage)
-	$(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
-	$(Q)$(RMDIR) $(DESTDIR)$(man7dir)
-
-.PHONY: helpers helpers-clean helpers-install helpers-uninstall
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index a07dfc479270..00e560a17baf 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -1198,7 +1198,7 @@ static int cmd_run(char *num)
 		else
 			return CMD_OK;
 		bpf_reset();
-	} while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
+	} while (pcap_next_pkt() && (!has_limit || (++i < pkts)));
 
 	rl_printf("bpf passes:%u fails:%u\n", pass, fail);
 
diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y
index 8d48e896be50..dfb7254a24e8 100644
--- a/tools/bpf/bpf_exp.y
+++ b/tools/bpf/bpf_exp.y
@@ -185,13 +185,13 @@ ldx
 	| OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
 		if ($2 != 4 || $9 != 0xf) {
 			fprintf(stderr, "ldxb offset not supported!\n");
-			exit(0);
+			exit(1);
 		} else {
 			bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
 	| OP_LDX number '*' '(' '[' number ']' '&' number ')' {
 		if ($2 != 4 || $9 != 0xf) {
 			fprintf(stderr, "ldxb offset not supported!\n");
-			exit(0);
+			exit(1);
 		} else {
 			bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
 	;
@@ -472,7 +472,7 @@ static void bpf_assert_max(void)
 {
 	if (curr_instr >= BPF_MAXINSNS) {
 		fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
-		exit(0);
+		exit(1);
 	}
 }
 
@@ -522,7 +522,7 @@ static int bpf_find_insns_offset(const char *label)
 
 	if (ret == -ENOENT) {
 		fprintf(stderr, "no such label \'%s\'!\n", label);
-		exit(0);
+		exit(1);
 	}
 
 	return ret;
@@ -549,9 +549,11 @@ static uint8_t bpf_encode_jt_jf_offset(int off, int i)
 {
 	int delta = off - i - 1;
 
-	if (delta < 0 || delta > 255)
-		fprintf(stderr, "warning: insn #%d jumps to insn #%d, "
+	if (delta < 0 || delta > 255) {
+		fprintf(stderr, "error: insn #%d jumps to insn #%d, "
 				"which is out of range\n", i, off);
+		exit(1);
+	}
 	return (uint8_t) delta;
 }
 
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 944cb4b7c95d..05ce4446b780 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -3,7 +3,6 @@
 /bootstrap/
 /bpftool
 bpftool*.8
-bpf-helpers.*
 FEATURE-DUMP.bpftool
 feature
 libbpf
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index f33cb02de95c..c49487905ceb 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -16,15 +16,12 @@ prefix ?= /usr/local
 mandir ?= $(prefix)/man
 man8dir = $(mandir)/man8
 
-# Load targets for building eBPF helpers man page.
-include ../../Makefile.helpers
-
 MAN8_RST = $(wildcard bpftool*.rst)
 
 _DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
 DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
 
-man: man8 helpers
+man: man8
 man8: $(DOC_MAN8)
 
 RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@@ -46,16 +43,16 @@ ifndef RST2MAN_DEP
 endif
 	$(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
 
-clean: helpers-clean
+clean:
 	$(call QUIET_CLEAN, Documentation)
 	$(Q)$(RM) $(DOC_MAN8)
 
-install: man helpers-install
+install: man
 	$(call QUIET_INSTALL, Documentation-man)
 	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
 	$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
 
-uninstall: helpers-uninstall
+uninstall:
 	$(call QUIET_UNINST, Documentation-man)
 	$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
 	$(Q)$(RMDIR) $(DESTDIR)$(man8dir)
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index fe9e7b3a4b50..985610c3f193 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -36,6 +36,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
+	[BTF_KIND_FLOAT]	= "FLOAT",
 };
 
 struct btf_attach_table {
@@ -327,6 +328,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
 			jsonw_end_array(w);
 		break;
 	}
+	case BTF_KIND_FLOAT: {
+		if (json_output)
+			jsonw_uint_field(w, "size", t->size);
+		else
+			printf(" size=%u", t->size);
+		break;
+	}
 	default:
 		break;
 	}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 0e9310727281..7ca54d046362 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -596,6 +596,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
 	switch (BTF_INFO_KIND(t->info)) {
 	case BTF_KIND_INT:
 	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FLOAT:
 		BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
 		break;
 	case BTF_KIND_STRUCT:
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 359960a8f1de..40a88df275f9 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -336,6 +336,10 @@ static void probe_kernel_image_config(const char *define_prefix)
 		{ "CONFIG_BPF_JIT", },
 		/* Avoid compiling eBPF interpreter (use JIT only) */
 		{ "CONFIG_BPF_JIT_ALWAYS_ON", },
+		/* Kernel BTF debug information available */
+		{ "CONFIG_DEBUG_INFO_BTF", },
+		/* Kernel module BTF debug information available */
+		{ "CONFIG_DEBUG_INFO_BTF_MODULES", },
 
 		/* cgroups */
 		{ "CONFIG_CGROUPS", },
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 8608cd68cdd0..6fc3e6f7f40c 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
 	else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+	else if (insn->src_reg == BPF_PSEUDO_FUNC)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "subprog[%+d]", insn->imm);
 	else
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "0x%llx", (unsigned long long)full_imm);
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 9d9fb6209be1..3818ec511fd2 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -16,7 +16,10 @@ CFLAGS := -g -Wall
 
 # Try to detect best kernel BTF source
 KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)		\
+	$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+	../../../vmlinux /sys/kernel/btf/vmlinux	\
+	/boot/vmlinux-$(KERNEL_REL)
 VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
 					  $(wildcard $(VMLINUX_BTF_PATHS))))
 
@@ -66,12 +69,16 @@ $(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
 	$(QUIET_MKDIR)mkdir -p $@
 
 $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
 	$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
 		echo "Couldn't find kernel BTF; set VMLINUX_BTF to"	       \
 			"specify its location." >&2;			       \
 		exit 1;\
 	fi
 	$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
+else
+	$(Q)cp "$(VMLINUX_H)" $@
+endif
 
 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
 	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index 1f18a409f044..645530ca7e98 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
 const volatile pid_t targ_pid = 0;
 
 struct {
-	__uint(type, BPF_MAP_TYPE_HASH);
-	__uint(max_entries, 10240);
-	__type(key, u32);
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
 	__type(value, u64);
 } start SEC(".maps");
 
@@ -25,15 +25,20 @@ struct {
 
 /* record enqueue timestamp */
 __always_inline
-static int trace_enqueue(u32 tgid, u32 pid)
+static int trace_enqueue(struct task_struct *t)
 {
-	u64 ts;
+	u32 pid = t->pid;
+	u64 *ptr;
 
 	if (!pid || (targ_pid && targ_pid != pid))
 		return 0;
 
-	ts = bpf_ktime_get_ns();
-	bpf_map_update_elem(&start, &pid, &ts, 0);
+	ptr = bpf_task_storage_get(&start, t, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	*ptr = bpf_ktime_get_ns();
 	return 0;
 }
 
@@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx)
 	/* TP_PROTO(struct task_struct *p) */
 	struct task_struct *p = (void *)ctx[0];
 
-	return trace_enqueue(p->tgid, p->pid);
+	return trace_enqueue(p);
 }
 
 SEC("tp_btf/sched_wakeup_new")
@@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx)
 	/* TP_PROTO(struct task_struct *p) */
 	struct task_struct *p = (void *)ctx[0];
 
-	return trace_enqueue(p->tgid, p->pid);
+	return trace_enqueue(p);
 }
 
 SEC("tp_btf/sched_switch")
@@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx)
 
 	/* ivcsw: treat like an enqueue event and store timestamp */
 	if (prev->state == TASK_RUNNING)
-		trace_enqueue(prev->tgid, prev->pid);
+		trace_enqueue(prev);
 
 	pid = next->pid;
 
+	/* For pid mismatch, save a bpf_task_storage_get */
+	if (!pid || (targ_pid && targ_pid != pid))
+		return 0;
+
 	/* fetch timestamp and calculate delta */
-	tsp = bpf_map_lookup_elem(&start, &pid);
+	tsp = bpf_task_storage_get(&start, next, 0, 0);
 	if (!tsp)
 		return 0;   /* missed enqueue */
 
@@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx)
 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
 			      &event, sizeof(event));
 
-	bpf_map_delete_elem(&start, &pid);
+	bpf_task_storage_delete(&start, next);
 	return 0;
 }
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 79c893310492..2d3036e292a9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
 	} map;
 };
 
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ *	Description
+ *		Create a map and return a file descriptor that refers to the
+ *		map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ *		is automatically enabled for the new file descriptor.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ *	Description
+ *		Look up an element with a given *key* in the map referred to
+ *		by the file descriptor *map_fd*.
+ *
+ *		The *flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ *	Description
+ *		Create or update an element (key/value pair) in a specified map.
+ *
+ *		The *flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create a new element or update an existing element.
+ *		**BPF_NOEXIST**
+ *			Create a new element only if it did not exist.
+ *		**BPF_EXIST**
+ *			Update an existing element.
+ *		**BPF_F_LOCK**
+ *			Update a spin_lock-ed map element.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ *		**E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ *		**E2BIG**
+ *			The number of elements in the map reached the
+ *			*max_entries* limit specified at map creation time.
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ *	Description
+ *		Look up and delete an element by key in a specified map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ *	Description
+ *		Look up an element by key in a specified map and return the key
+ *		of the next element. Can be used to iterate over all elements
+ *		in the map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		The following cases can be used to iterate over all elements of
+ *		the map:
+ *
+ *		* If *key* is not found, the operation returns zero and sets
+ *		  the *next_key* pointer to the key of the first element.
+ *		* If *key* is found, the operation returns zero and sets the
+ *		  *next_key* pointer to the key of the next element.
+ *		* If *key* is the last element, returns -1 and *errno* is set
+ *		  to **ENOENT**.
+ *
+ *		May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ *		**EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ *	Description
+ *		Verify and load an eBPF program, returning a new file
+ *		descriptor associated with the program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ *		The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ *		automatically enabled for the new file descriptor.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ *	Description
+ *		Pin an eBPF program or map referred by the specified *bpf_fd*
+ *		to the provided *pathname* on the filesystem.
+ *
+ *		The *pathname* argument must not contain a dot (".").
+ *
+ *		On success, *pathname* retains a reference to the eBPF object,
+ *		preventing deallocation of the object when the original
+ *		*bpf_fd* is closed. This allow the eBPF object to live beyond
+ *		**close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ *		process.
+ *
+ *		Applying **unlink**\ (2) or similar calls to the *pathname*
+ *		unpins the object from the filesystem, removing the reference.
+ *		If no other file descriptors or filesystem nodes refer to the
+ *		same object, it will be deallocated (see NOTES).
+ *
+ *		The filesystem type for the parent directory of *pathname* must
+ *		be **BPF_FS_MAGIC**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_OBJ_GET
+ *	Description
+ *		Open a file descriptor for the eBPF object pinned to the
+ *		specified *pathname*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook.
+ *
+ *		The *attach_type* specifies the eBPF attachment point to
+ *		attach the program to, and must be one of *bpf_attach_type*
+ *		(see below).
+ *
+ *		The *attach_bpf_fd* must be a valid file descriptor for a
+ *		loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ *		or sock_ops type corresponding to the specified *attach_type*.
+ *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_TYPE_SK_SKB**,
+ *		**BPF_PROG_TYPE_SK_MSG**
+ *
+ *			eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ *	Description
+ *		Detach the eBPF program associated with the *target_fd* at the
+ *		hook specified by *attach_type*. The program must have been
+ *		previously attached using **BPF_PROG_ATTACH**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ *	Description
+ *		Run the eBPF program associated with the *prog_fd* a *repeat*
+ *		number of times against a provided program context *ctx_in* and
+ *		data *data_in*, and return the modified program context
+ *		*ctx_out*, *data_out* (for example, packet data), result of the
+ *		execution *retval*, and *duration* of the test run.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		**ENOSPC**
+ *			Either *data_size_out* or *ctx_size_out* is too small.
+ *		**ENOTSUPP**
+ *			This command is not supported by the program type of
+ *			the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF program currently loaded into the kernel.
+ *
+ *		Looks for the eBPF program with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF programs
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF map currently loaded into the kernel.
+ *
+ *		Looks for the eBPF map with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF maps
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF program corresponding to
+ *		*prog_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF map corresponding to
+ *		*map_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ *	Description
+ *		Obtain information about the eBPF object corresponding to
+ *		*bpf_fd*.
+ *
+ *		Populates up to *info_len* bytes of *info*, which will be in
+ *		one of the following formats depending on the eBPF object type
+ *		of *bpf_fd*:
+ *
+ *		* **struct bpf_prog_info**
+ *		* **struct bpf_map_info**
+ *		* **struct bpf_btf_info**
+ *		* **struct bpf_link_info**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		specified *attach_type* hook.
+ *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_QUERY** always fetches the number of programs
+ *		attached and the *attach_flags* which were used to attach those
+ *		programs. Additionally, if *prog_ids* is nonzero and the number
+ *		of attached programs is less than *prog_cnt*, populates
+ *		*prog_ids* with the eBPF program ids of the programs attached
+ *		at *target_fd*.
+ *
+ *		The following flags may alter the result:
+ *
+ *		**BPF_F_QUERY_EFFECTIVE**
+ *			Only return information regarding programs which are
+ *			currently effective at the specified *target_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ *	Description
+ *		Attach an eBPF program to a tracepoint *name* to access kernel
+ *		internal arguments of the tracepoint in their raw form.
+ *
+ *		The *prog_fd* must be a valid file descriptor associated with
+ *		a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ *		No ABI guarantees are made about the content of tracepoint
+ *		arguments exposed to the corresponding eBPF program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ *	Description
+ *		Verify and load BPF Type Format (BTF) metadata into the kernel,
+ *		returning a new file descriptor associated with the metadata.
+ *		BTF is described in more detail at
+ *		https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ *		The *btf* parameter must point to valid memory providing
+ *		*btf_size* bytes of BTF binary metadata.
+ *
+ *		The returned file descriptor can be passed to other **bpf**\ ()
+ *		subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ *		associate the BTF with those objects.
+ *
+ *		Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ *		parameters to specify a *btf_log_buf*, *btf_log_size* and
+ *		*btf_log_level* which allow the kernel to return freeform log
+ *		output regarding the BTF verification process.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the BPF Type Format (BTF)
+ *		corresponding to *btf_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		target process identified by *pid* and *fd*.
+ *
+ *		If the *pid* and *fd* are associated with a tracepoint, kprobe
+ *		or uprobe perf event, then the *prog_id* and *fd_type* will
+ *		be populated with the eBPF program id and file descriptor type
+ *		of type **bpf_task_fd_type**. If associated with a kprobe or
+ *		uprobe, the  *probe_offset* and *probe_addr* will also be
+ *		populated. Optionally, if *buf* is provided, then up to
+ *		*buf_len* bytes of *buf* will be populated with the name of
+ *		the tracepoint, kprobe or uprobe.
+ *
+ *		The resulting *prog_id* may be introspected in deeper detail
+ *		using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ *	Description
+ *		Look up an element with the given *key* in the map referred to
+ *		by the file descriptor *fd*, and if found, delete the element.
+ *
+ *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ *		implement this command as a "pop" operation, deleting the top
+ *		element rather than one corresponding to *key*.
+ *		The *key* and *key_len* parameters should be zeroed when
+ *		issuing this operation for these map types.
+ *
+ *		This command is only valid for the following map types:
+ *		* **BPF_MAP_TYPE_QUEUE**
+ *		* **BPF_MAP_TYPE_STACK**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ *	Description
+ *		Freeze the permissions of the specified map.
+ *
+ *		Write permissions may be frozen by passing zero *flags*.
+ *		Upon success, no future syscall invocations may alter the
+ *		map state of *map_fd*. Write operations from eBPF programs
+ *		are still possible for a frozen map.
+ *
+ *		Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ *	Description
+ *		Fetch the next BPF Type Format (BTF) object currently loaded
+ *		into the kernel.
+ *
+ *		Looks for the BTF object with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other BTF objects
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ *	Description
+ *		Iterate and fetch multiple elements in a map.
+ *
+ *		Two opaque values are used to manage batch operations,
+ *		*in_batch* and *out_batch*. Initially, *in_batch* must be set
+ *		to NULL to begin the batched operation. After each subsequent
+ *		**BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ *		*out_batch* as the *in_batch* for the next operation to
+ *		continue iteration from the current point.
+ *
+ *		The *keys* and *values* are output parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are copied into the
+ *		user buffer, with the keys copied into *keys* and the values
+ *		copied into the corresponding indices in *values*.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **ENOSPC** to indicate that *keys* or
+ *		*values* is too small to dump an entire bucket during
+ *		iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ *	Description
+ *		Iterate and delete all elements in a map.
+ *
+ *		This operation has the same behavior as
+ *		**BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ *		* Every element that is successfully returned is also deleted
+ *		  from the map. This is at least *count* elements. Note that
+ *		  *count* is both an input and an output parameter.
+ *		* Upon returning with *errno* set to **EFAULT**, up to
+ *		  *count* elements may be deleted without returning the keys
+ *		  and values of the deleted elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ *	Description
+ *		Update multiple elements in a map by *key*.
+ *
+ *		The *keys* and *values* are input parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially updated to the
+ *		value in the corresponding index in *values*. The *in_batch*
+ *		and *out_batch* parameters are ignored and should be zeroed.
+ *
+ *		The *elem_flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create new elements or update a existing elements.
+ *		**BPF_NOEXIST**
+ *			Create new elements only if they do not exist.
+ *		**BPF_EXIST**
+ *			Update existing elements.
+ *		**BPF_F_LOCK**
+ *			Update spin_lock-ed map elements. This must be
+ *			specified if the map value contains a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ *		**E2BIG**. **E2BIG** indicates that the number of elements in
+ *		the map reached the *max_entries* limit specified at map
+ *		creation time.
+ *
+ *		May set *errno* to one of the following error codes under
+ *		specific circumstances:
+ *
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ *	Description
+ *		Delete multiple elements in a map by *key*.
+ *
+ *		The *keys* parameter is an input parameter which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially deleted. The
+ *		*in_batch*, *out_batch*, and *values* parameters are ignored
+ *		and should be zeroed.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements. If
+ *		*errno* is **EFAULT**, up to *count* elements may be been
+ *		deleted.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook and return a file descriptor handle for
+ *		managing the link.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ *	Description
+ *		Update the eBPF program in the specified *link_fd* to
+ *		*new_prog_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF Link corresponding to
+ *		*link_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF link currently loaded into the kernel.
+ *
+ *		Looks for the eBPF link with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF links
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ *	Description
+ *		Enable eBPF runtime statistics gathering.
+ *
+ *		Runtime statistics gathering for the eBPF runtime is disabled
+ *		by default to minimize the corresponding performance overhead.
+ *		This command enables statistics globally.
+ *
+ *		Multiple programs may independently enable statistics.
+ *		After gathering the desired statistics, eBPF runtime statistics
+ *		may be disabled again by calling **close**\ (2) for the file
+ *		descriptor returned by this function. Statistics will only be
+ *		disabled system-wide when all outstanding file descriptors
+ *		returned by prior calls for this subcommand are closed.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ *	Description
+ *		Create an iterator on top of the specified *link_fd* (as
+ *		previously created using **BPF_LINK_CREATE**) and return a
+ *		file descriptor that can be used to trigger the iteration.
+ *
+ *		If the resulting file descriptor is pinned to the filesystem
+ *		using  **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ *		for that path will trigger the iterator to read kernel state
+ *		using the eBPF program attached to *link_fd*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ *	Description
+ *		Forcefully detach the specified *link_fd* from its
+ *		corresponding attachment point.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ *	Description
+ *		Bind a map to the lifetime of an eBPF program.
+ *
+ *		The map identified by *map_fd* is bound to the program
+ *		identified by *prog_fd* and only released when *prog_fd* is
+ *		released. This may be used in cases where metadata should be
+ *		associated with a program which otherwise does not contain any
+ *		references to the map (for example, embedded in the eBPF
+ *		program instructions).
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * NOTES
+ *	eBPF objects (maps and programs) can be shared between processes.
+ *
+ *	* After **fork**\ (2), the child inherits file descriptors
+ *	  referring to the same eBPF objects.
+ *	* File descriptors referring to eBPF objects can be transferred over
+ *	  **unix**\ (7) domain sockets.
+ *	* File descriptors referring to eBPF objects can be duplicated in the
+ *	  usual way, using **dup**\ (2) and similar calls.
+ *	* File descriptors referring to eBPF objects can be pinned to the
+ *	  filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ *	An eBPF object is deallocated only after all file descriptors referring
+ *	to the object have been closed and no references remain pinned to the
+ *	filesystem or attached (for example, bound to a program or device).
+ */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
 	BPF_MAP_LOOKUP_ELEM,
@@ -393,6 +1103,15 @@ enum bpf_link_type {
  *                   is struct/union.
  */
 #define BPF_PSEUDO_BTF_ID	3
+/* insn[0].src_reg:  BPF_PSEUDO_FUNC
+ * insn[0].imm:      insn offset to the func
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the function
+ * verifier type:    PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC		4
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
@@ -720,7 +1439,7 @@ union bpf_attr {
  * parsed and used to produce a manual page. The workflow is the following,
  * and requires the rst2man utility:
  *
- *     $ ./scripts/bpf_helpers_doc.py \
+ *     $ ./scripts/bpf_doc.py \
  *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
  *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
  *     $ man /tmp/bpf-helpers.7
@@ -1765,6 +2484,10 @@ union bpf_attr {
  *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
  *		  type; *len* is the length of the inner MAC header.
  *
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *		  Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *		  L2 type as Ethernet.
+ *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
  * 		previously done by the verifier are invalidated and must be
@@ -3909,6 +4632,34 @@ union bpf_attr {
  *		* **BPF_MTU_CHK_RET_FRAG_NEEDED**
  *		* **BPF_MTU_CHK_RET_SEGS_TOOBIG**
  *
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		For each element in **map**, call **callback_fn** function with
+ *		**map**, **callback_ctx** and other map-specific parameters.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		The **flags** is used to control certain aspects of the helper.
+ *		Currently, the **flags** must be 0.
+ *
+ *		The following are a list of supported map types and their
+ *		respective expected callback signatures:
+ *
+ *		BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ *		BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ *		BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ *		long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ *		For per_cpu maps, the map_value is the value on the cpu where the
+ *		bpf_prog is running.
+ *
+ *		If **callback_fn** return 0, the helper will continue to the next
+ *		element. If return value is 1, the helper will skip the rest of
+ *		elements and return. Other return values are not used now.
+ *
+ *	Return
+ *		The number of traversed map elements for success, **-EINVAL** for
+ *		invalid **flags**.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4075,6 +4826,7 @@ union bpf_attr {
 	FN(ima_inode_hash),		\
 	FN(sock_from_file),		\
 	FN(check_mtu),			\
+	FN(for_each_map_elem),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4168,6 +4920,7 @@ enum {
 	BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
 	BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
 	BPF_F_ADJ_ROOM_NO_CSUM_RESET	= (1ULL << 5),
+	BPF_F_ADJ_ROOM_ENCAP_L2_ETH	= (1ULL << 6),
 };
 
 enum {
@@ -5205,7 +5958,10 @@ struct bpf_pidns_info {
 
 /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
 struct bpf_sk_lookup {
-	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+	union {
+		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+	};
 
 	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
 	};
 };
 
-#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_INFO_KFLAG(info)	((info) >> 31)
 
@@ -72,7 +72,8 @@ struct btf_type {
 #define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
 #define BTF_KIND_VAR		14	/* Variable	*/
 #define BTF_KIND_DATASEC	15	/* Section	*/
-#define BTF_KIND_MAX		BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT		16	/* Floating point	*/
+#define BTF_KIND_MAX		BTF_KIND_FLOAT
 #define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e9eb6a6e80d2..87b14b74d62f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -158,7 +158,7 @@ $(BPF_IN_STATIC): force $(BPF_HELPER_DEFS)
 	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
 
 $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
-	$(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
+	$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
 		--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
 
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d9c10830d749..3aa58f2ac183 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -291,6 +291,7 @@ static int btf_type_size(const struct btf_type *t)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_FLOAT:
 		return base_size;
 	case BTF_KIND_INT:
 		return base_size + sizeof(__u32);
@@ -338,6 +339,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_FLOAT:
 		return 0;
 	case BTF_KIND_INT:
 		*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@@ -578,6 +580,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 		case BTF_KIND_UNION:
 		case BTF_KIND_ENUM:
 		case BTF_KIND_DATASEC:
+		case BTF_KIND_FLOAT:
 			size = t->size;
 			goto done;
 		case BTF_KIND_PTR:
@@ -621,6 +624,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
 	switch (kind) {
 	case BTF_KIND_INT:
 	case BTF_KIND_ENUM:
+	case BTF_KIND_FLOAT:
 		return min(btf_ptr_sz(btf), (size_t)t->size);
 	case BTF_KIND_PTR:
 		return btf_ptr_sz(btf);
@@ -1756,6 +1760,47 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 	return btf_commit_type(btf, sz);
 }
 
+/*
+ * Append new BTF_KIND_FLOAT type with:
+ *   - *name* - non-empty, non-NULL type name;
+ *   - *sz* - size of the type, in bytes;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
+{
+	struct btf_type *t;
+	int sz, name_off;
+
+	/* non-empty name */
+	if (!name || !name[0])
+		return -EINVAL;
+
+	/* byte_sz must be one of the explicitly allowed values */
+	if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
+	    byte_sz != 16)
+		return -EINVAL;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	name_off = btf__add_str(btf, name);
+	if (name_off < 0)
+		return name_off;
+
+	t->name_off = name_off;
+	t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
+	t->size = byte_sz;
+
+	return btf_commit_type(btf, sz);
+}
+
 /* it's completely legal to append BTF types with type IDs pointing forward to
  * types that haven't been appended yet, so we only make sure that id looks
  * sane, we can't guarantee that ID will always be valid
@@ -1883,7 +1928,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
  *   - *byte_sz* - size of the struct, in bytes;
  *
  * Struct initially has no fields in it. Fields can be added by
- * btf__add_field() right after btf__add_struct() succeeds. 
+ * btf__add_field() right after btf__add_struct() succeeds.
  *
  * Returns:
  *   - >0, type ID of newly added BTF type;
@@ -3626,6 +3671,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
 		case BTF_KIND_FWD:
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_FUNC:
+		case BTF_KIND_FLOAT:
 			h = btf_hash_common(t);
 			break;
 		case BTF_KIND_INT:
@@ -3722,6 +3768,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 		break;
 
 	case BTF_KIND_FWD:
+	case BTF_KIND_FLOAT:
 		h = btf_hash_common(t);
 		for_each_dedup_cand(d, hash_entry, h) {
 			cand_id = (__u32)(long)hash_entry->value;
@@ -3983,6 +4030,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 			return btf_compat_enum(cand_type, canon_type);
 
 	case BTF_KIND_FWD:
+	case BTF_KIND_FLOAT:
 		return btf_equal_common(cand_type, canon_type);
 
 	case BTF_KIND_CONST:
@@ -4479,6 +4527,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 	switch (btf_kind(t)) {
 	case BTF_KIND_INT:
 	case BTF_KIND_ENUM:
+	case BTF_KIND_FLOAT:
 		break;
 
 	case BTF_KIND_FWD:
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1237bcd1dd17..029a9cfc8c2d 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -95,6 +95,7 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
 
 LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
 LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
 LIBBPF_API int btf__add_array(struct btf *btf,
 			      int index_type_id, int elem_type_id, __u32 nr_elems);
@@ -294,6 +295,11 @@ static inline bool btf_is_datasec(const struct btf_type *t)
 	return btf_kind(t) == BTF_KIND_DATASEC;
 }
 
+static inline bool btf_is_float(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FLOAT;
+}
+
 static inline __u8 btf_int_encoding(const struct btf_type *t)
 {
 	return BTF_INT_ENCODING(*(__u32 *)(t + 1));
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 0911aea4cdbe..7b53a484860f 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -279,6 +279,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
 		case BTF_KIND_INT:
 		case BTF_KIND_ENUM:
 		case BTF_KIND_FWD:
+		case BTF_KIND_FLOAT:
 			break;
 
 		case BTF_KIND_VOLATILE:
@@ -453,6 +454,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 
 	switch (btf_kind(t)) {
 	case BTF_KIND_INT:
+	case BTF_KIND_FLOAT:
 		tstate->order_state = ORDERED;
 		return 0;
 
@@ -1133,6 +1135,7 @@ skip_mod:
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
 		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_FLOAT:
 			goto done;
 		default:
 			pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
@@ -1247,6 +1250,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 
 		switch (kind) {
 		case BTF_KIND_INT:
+		case BTF_KIND_FLOAT:
 			btf_dump_emit_mods(d, decls);
 			name = btf_name_of(d, t->name_off);
 			btf_dump_printf(d, "%s", name);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4181d178ee7b..8d610259f4be 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -178,6 +178,8 @@ enum kern_feature_id {
 	FEAT_PROG_BIND_MAP,
 	/* Kernel support for module BTFs */
 	FEAT_MODULE_BTF,
+	/* BTF_KIND_FLOAT support */
+	FEAT_BTF_FLOAT,
 	__FEAT_CNT,
 };
 
@@ -188,6 +190,7 @@ enum reloc_type {
 	RELO_CALL,
 	RELO_DATA,
 	RELO_EXTERN,
+	RELO_SUBPROG_ADDR,
 };
 
 struct reloc_desc {
@@ -574,6 +577,16 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
 	       insn->off == 0;
 }
 
+static bool is_ldimm64(struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static bool insn_is_pseudo_func(struct bpf_insn *insn)
+{
+	return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
 static int
 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		      const char *name, size_t sec_idx, const char *sec_name,
@@ -1936,6 +1949,7 @@ static const char *btf_kind_str(const struct btf_type *t)
 	case BTF_KIND_FUNC_PROTO: return "func_proto";
 	case BTF_KIND_VAR: return "var";
 	case BTF_KIND_DATASEC: return "datasec";
+	case BTF_KIND_FLOAT: return "float";
 	default: return "unknown";
 	}
 }
@@ -2385,15 +2399,17 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
 {
 	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
 	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
 	bool has_func = kernel_supports(FEAT_BTF_FUNC);
 
-	return !has_func || !has_datasec || !has_func_global;
+	return !has_func || !has_datasec || !has_func_global || !has_float;
 }
 
 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 {
 	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
 	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
 	bool has_func = kernel_supports(FEAT_BTF_FUNC);
 	struct btf_type *t;
 	int i, j, vlen;
@@ -2446,6 +2462,13 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 		} else if (!has_func_global && btf_is_func(t)) {
 			/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
 			t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
+		} else if (!has_float && btf_is_float(t)) {
+			/* replace FLOAT with an equally-sized empty STRUCT;
+			 * since C compilers do not accept e.g. "float" as a
+			 * valid struct name, make it anonymous
+			 */
+			t->name_off = 0;
+			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
 		}
 	}
 }
@@ -2975,6 +2998,23 @@ static bool sym_is_extern(const GElf_Sym *sym)
 	       GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
 }
 
+static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
+{
+	int bind = GELF_ST_BIND(sym->st_info);
+	int type = GELF_ST_TYPE(sym->st_info);
+
+	/* in .text section */
+	if (sym->st_shndx != text_shndx)
+		return false;
+
+	/* local function */
+	if (bind == STB_LOCAL && type == STT_SECTION)
+		return true;
+
+	/* global function */
+	return bind == STB_GLOBAL && type == STT_FUNC;
+}
+
 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 {
 	const struct btf_type *t;
@@ -3396,7 +3436,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		return 0;
 	}
 
-	if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+	if (!is_ldimm64(insn)) {
 		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
 			prog->name, sym_name, insn_idx, insn->code);
 		return -LIBBPF_ERRNO__RELOC;
@@ -3431,6 +3471,23 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		return -LIBBPF_ERRNO__RELOC;
 	}
 
+	/* loading subprog addresses */
+	if (sym_is_subprog(sym, obj->efile.text_shndx)) {
+		/* global_func: sym->st_value = offset in the section, insn->imm = 0.
+		 * local_func: sym->st_value = 0, insn->imm = offset in the section.
+		 */
+		if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
+			pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
+				prog->name, sym_name, (size_t)sym->st_value, insn->imm);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		reloc_desc->type = RELO_SUBPROG_ADDR;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->sym_off = sym->st_value;
+		return 0;
+	}
+
 	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
 	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 
@@ -3883,6 +3940,18 @@ static int probe_kern_btf_datasec(void)
 					     strs, sizeof(strs)));
 }
 
+static int probe_kern_btf_float(void)
+{
+	static const char strs[] = "\0float";
+	__u32 types[] = {
+		/* float */
+		BTF_TYPE_FLOAT_ENC(1, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
 static int probe_kern_array_mmap(void)
 {
 	struct bpf_create_map_attr attr = {
@@ -4062,6 +4131,9 @@ static struct kern_feature_desc {
 	[FEAT_MODULE_BTF] = {
 		"module BTF support", probe_module_btf,
 	},
+	[FEAT_BTF_FLOAT] = {
+		"BTF_KIND_FLOAT support", probe_kern_btf_float,
+	},
 };
 
 static bool kernel_supports(enum kern_feature_id feat_id)
@@ -5567,11 +5639,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
 	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
 }
 
-static bool is_ldimm64(struct bpf_insn *insn)
-{
-	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
 static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
 {
 	switch (BPF_SIZE(insn->code)) {
@@ -6173,6 +6240,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 			}
 			relo->processed = true;
 			break;
+		case RELO_SUBPROG_ADDR:
+			insn[0].src_reg = BPF_PSEUDO_FUNC;
+			/* will be handled as a follow up pass */
+			break;
 		case RELO_CALL:
 			/* will be handled as a follow up pass */
 			break;
@@ -6359,11 +6430,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 
 	for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
 		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
-		if (!insn_is_subprog_call(insn))
+		if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
 			continue;
 
 		relo = find_prog_insn_relo(prog, insn_idx);
-		if (relo && relo->type != RELO_CALL) {
+		if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
 			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
 				prog->name, insn_idx, relo->type);
 			return -LIBBPF_ERRNO__RELOC;
@@ -6375,8 +6446,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 			 * call always has imm = -1, but for static functions
 			 * relocation is against STT_SECTION and insn->imm
 			 * points to a start of a static function
+			 *
+			 * for subprog addr relocation, the relo->sym_off + insn->imm is
+			 * the byte offset in the corresponding section.
+			 */
+			if (relo->type == RELO_CALL)
+				sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+			else
+				sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
+		} else if (insn_is_pseudo_func(insn)) {
+			/*
+			 * RELO_SUBPROG_ADDR relo is always emitted even if both
+			 * functions are in the same section, so it shouldn't reach here.
 			 */
-			sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+			pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+				prog->name, insn_idx);
+			return -LIBBPF_ERRNO__RELOC;
 		} else {
 			/* if subprogram call is to a static function within
 			 * the same ELF section, there won't be any relocation
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 1c0fd2dd233a..ec898f464ab9 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -350,3 +350,8 @@ LIBBPF_0.3.0 {
 		xsk_setup_xdp_prog;
 		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
+
+LIBBPF_0.4.0 {
+	global:
+		btf__add_float;
+} LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 969d0ac592ba..343f6eb05637 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -31,6 +31,8 @@
 #define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
 #define BTF_PARAM_ENC(name, type) (name), (type)
 #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
 
 #ifndef likely
 #define likely(x) __builtin_expect(!!(x), 1)
diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 59c779c5790c..cfbcfc063c81 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -5,6 +5,7 @@
 #define __LIBBPF_LIBBPF_UTIL_H
 
 #include <stdbool.h>
+#include <linux/compiler.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -15,29 +16,56 @@ extern "C" {
  * application that uses libbpf.
  */
 #if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_rmb() asm volatile("" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("" : : : "memory")
-# define libbpf_smp_mb() \
-	asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
-/* Hinders stores to be observed before older loads. */
-# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile("" : : : "memory");			\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		asm volatile("" : : : "memory");			\
+		___p1;							\
+	})
 #elif defined(__aarch64__)
-# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#elif defined(__arm__)
-/* These are only valid for armv7 and above */
-# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#else
-/* Architecture missing native barrier functions. */
-# define libbpf_smp_rmb() __sync_synchronize()
-# define libbpf_smp_wmb() __sync_synchronize()
-# define libbpf_smp_mb() __sync_synchronize()
-# define libbpf_smp_rwmb() __sync_synchronize()
+# define libbpf_smp_store_release(p, v)					\
+		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1;					\
+		asm volatile ("ldar %w0, %1"				\
+			      : "=r" (___p1) : "Q" (*p) : "memory");	\
+		___p1;							\
+	})
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile ("fence rw,w" : : : "memory");		\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		asm volatile ("fence r,rw" : : : "memory");		\
+		___p1;							\
+	})
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v)					\
+	do {								\
+		__sync_synchronize();					\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		__sync_synchronize();					\
+		___p1;							\
+	})
 #endif
 
 #ifdef __cplusplus
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index e9f121f5d129..a9fdea87b5cd 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -96,7 +96,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
 	 * this function. Without this optimization it whould have been
 	 * free_entries = r->cached_prod - r->cached_cons + r->size.
 	 */
-	r->cached_cons = *r->consumer + r->size;
+	r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+	r->cached_cons += r->size;
 
 	return r->cached_cons - r->cached_prod;
 }
@@ -106,7 +107,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 	__u32 entries = r->cached_prod - r->cached_cons;
 
 	if (entries == 0) {
-		r->cached_prod = *r->producer;
+		r->cached_prod = libbpf_smp_load_acquire(r->producer);
 		entries = r->cached_prod - r->cached_cons;
 	}
 
@@ -129,9 +130,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
 	 */
-	libbpf_smp_wmb();
-
-	*prod->producer += nb;
+	libbpf_smp_store_release(prod->producer, *prod->producer + nb);
 }
 
 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -139,11 +138,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
 	__u32 entries = xsk_cons_nb_avail(cons, nb);
 
 	if (entries > 0) {
-		/* Make sure we do not speculatively read the data before
-		 * we have received the packet buffers from the ring.
-		 */
-		libbpf_smp_rmb();
-
 		*idx = cons->cached_cons;
 		cons->cached_cons += entries;
 	}
@@ -161,9 +155,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
 	 */
-	libbpf_smp_rwmb();
+	libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
 
-	*cons->consumer += nb;
 }
 
 static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 5d7b947320fb..f05c4d48fd7e 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -20,4 +20,4 @@ tools/lib/bitmap.c
 tools/lib/str_error_r.c
 tools/lib/vsprintf.c
 tools/lib/zalloc.c
-scripts/bpf_helpers_doc.py
+scripts/bpf_doc.py
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c0c48fdb9ac1..4866f6a21901 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
+bpf-helpers*
+bpf-syscall*
 test_verifier
 test_maps
 test_lru_map
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 044bfdcf5b74..c3999587bc23 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -68,6 +68,7 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
+	test_doc_build.sh \
 	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
@@ -103,6 +104,7 @@ override define CLEAN
 	$(call msg,CLEAN)
 	$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 	$(Q)$(MAKE) -C bpf_testmod clean
+	$(Q)$(MAKE) docs-clean
 endef
 
 include ../lib.mk
@@ -180,6 +182,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
 		    cp $(SCRATCH_DIR)/runqslower $@
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
+$(TEST_GEN_FILES): docs
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -200,11 +203,16 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    CC=$(HOSTCC) LD=$(HOSTLD)				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
 		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
-	$(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
-	$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras)	       \
-		    -C $(BPFTOOLDIR)/Documentation			       \
-		    OUTPUT=$(BUILD_DIR)/bpftool/Documentation/		       \
-		    prefix= DESTDIR=$(SCRATCH_DIR)/ install
+
+docs:
+	$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras)	\
+	            -f Makefile.docs					\
+	            prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
+
+docs-clean:
+	$(Q)$(MAKE) $(submake_extras)					\
+	            -f Makefile.docs					\
+	            prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
 
 $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   ../../../include/uapi/linux/bpf.h                                   \
@@ -382,11 +390,12 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 	$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
 
-# only copy extra resources if in flavored build
+# non-flavored in-srctree builds receive special treatment, in particular, we
+# do not need to copy extra resources (see e.g. test_btf_dump_case())
 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
-ifneq ($2,)
+ifneq ($2:$(OUTPUT),:$(shell pwd))
 	$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
-	$(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
+	$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
 endif
 
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
@@ -476,3 +485,5 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)	\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature								\
 	$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+
+.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
new file mode 100644
index 000000000000..ccf260021e83
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man2dir = $(mandir)/man2
+man7dir = $(mandir)/man7
+
+SYSCALL_RST = bpf-syscall.rst
+MAN2_RST = $(SYSCALL_RST)
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
+DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+DOCTARGETS := helpers syscall
+
+docs: $(DOCTARGETS)
+syscall: man2
+helpers: man7
+man2: $(DOC_MAN2)
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+# Configure make rules for the man page bpf-$1.$2.
+# $1 - target for scripts/bpf_doc.py
+# $2 - man page section to generate the troff file
+define DOCS_RULES =
+$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
+	$$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
+		--filename $$< > $$@
+
+$(OUTPUT)%.$2: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+	$$(error "rst2man not found, but required to generate man pages")
+endif
+	$$(QUIET_GEN)rst2man $$< > $$@
+
+docs-clean-$1:
+	$$(call QUIET_CLEAN, eBPF_$1-manpage)
+	$(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
+
+docs-install-$1: docs
+	$$(call QUIET_INSTALL, eBPF_$1-manpage)
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
+	$(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
+
+docs-uninstall-$1:
+	$$(call QUIET_UNINST, eBPF_$1-manpage)
+	$(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
+	$(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
+
+.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
+endef
+
+# Create the make targets to generate manual pages by name and section
+$(eval $(call DOCS_RULES,helpers,7))
+$(eval $(call DOCS_RULES,syscall,2))
+
+docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
+docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
+docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
+
+.PHONY: docs docs-clean docs-install docs-uninstall man2 man7
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index fd148b8410fa..3464161c8eea 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
 
 __  https://reviews.llvm.org/D78466
 
+bpf_verif_scale/loop6.o test failure with Clang 12
+==================================================
+
+With Clang 12, the following bpf_verif_scale test failed:
+  * ``bpf_verif_scale/loop6.o``
+
+The verifier output looks like
+
+.. code-block:: c
+
+  R1 type=ctx expected=fp
+  The sequence of 8193 jumps is too complex.
+
+The reason is compiler generating the following code
+
+.. code-block:: c
+
+  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+      14:       16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
+      15:       bc 51 00 00 00 00 00 00 w1 = w5
+      16:       04 01 00 00 ff ff ff ff w1 += -1
+      17:       67 05 00 00 20 00 00 00 r5 <<= 32
+      18:       77 05 00 00 20 00 00 00 r5 >>= 32
+      19:       a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
+      20:       b7 05 00 00 06 00 00 00 r5 = 6
+  00000000000000a8 <LBB0_4>:
+      21:       b7 02 00 00 00 00 00 00 r2 = 0
+      22:       b7 01 00 00 00 00 00 00 r1 = 0
+  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+      23:       7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
+      24:       7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
+
+Note that insn #15 has w1 = w5 and w1 is refined later but
+r5(w5) is eventually saved on stack at insn #24 for later use.
+This cause later verifier failure. The bug has been `fixed`__ in
+Clang 13.
+
+__  https://reviews.llvm.org/D97479
+
 BPF CO-RE-based tests and Clang version
 =======================================
 
@@ -131,3 +170,12 @@ failures:
 .. _2: https://reviews.llvm.org/D85174
 .. _3: https://reviews.llvm.org/D83878
 .. _4: https://reviews.llvm.org/D83242
+
+Floating-point tests and Clang version
+======================================
+
+Certain selftests, e.g. core_reloc, require support for the floating-point
+types, which was introduced in `Clang 13`__. The older Clang versions will
+either crash when compiling these tests, or generate an incorrect BTF.
+
+__  https://reviews.llvm.org/D83289
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index 48f90490f922..b692e6ead9b5 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -23,6 +23,7 @@ static const char * const btf_kind_str_mapping[] = {
 	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
+	[BTF_KIND_FLOAT]	= "FLOAT",
 };
 
 static const char *btf_kind_str(__u16 kind)
@@ -173,6 +174,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
 		}
 		break;
 	}
+	case BTF_KIND_FLOAT:
+		fprintf(out, " size=%u", t->size);
+		break;
 	default:
 		break;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index a0ee87c8e1ea..9dc4e3dfbcf3 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,6 +2,44 @@
 #include <test_progs.h>
 #include "test_attach_probe.skel.h"
 
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK   0xffff0000UL
+#define LIS_R2          0x3c400000UL
+#define ADDIS_R2_R12    0x3c4c0000UL
+#define ADDI_R2_R2      0x38420000UL
+
+static ssize_t get_offset(ssize_t addr, ssize_t base)
+{
+	u32 *insn = (u32 *) addr;
+
+	/*
+	 * A PPC64 ABIv2 function may have a local and a global entry
+	 * point. We need to use the local entry point when patching
+	 * functions, so identify and step over the global entry point
+	 * sequence.
+	 *
+	 * The global entry point sequence is always of the form:
+	 *
+	 * addis r2,r12,XXXX
+	 * addi  r2,r2,XXXX
+	 *
+	 * A linker optimisation may convert the addis to lis:
+	 *
+	 * lis   r2,XXXX
+	 * addi  r2,r2,XXXX
+	 */
+	if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+	     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+	    ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+		return (ssize_t)(insn + 2) - base;
+	else
+		return addr - base;
+}
+#else
+#define get_offset(addr, base) (addr - base)
+#endif
+
 ssize_t get_base_addr() {
 	size_t start, offset;
 	char buf[256];
@@ -36,7 +74,7 @@ void test_attach_probe(void)
 	if (CHECK(base_addr < 0, "get_base_addr",
 		  "failed to find base addr: %zd", base_addr))
 		return;
-	uprobe_offset = (size_t)&get_base_addr - base_addr;
+	uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
 
 	skel = test_attach_probe__open_and_load();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e698ee6bb6c2..3d002c245d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
 		{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 		{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
 		{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
+		{ "loop6.o", BPF_PROG_TYPE_KPROBE },
 
 		/* partial unroll. 19k insn in a loop.
 		 * Total program size 20.8k insn.
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 6a7ee7420701..0457ae32b270 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -1903,7 +1903,7 @@ static struct btf_raw_test raw_tests[] = {
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
-		BTF_TYPE_ENC(0, 0x10000000, 4),
+		BTF_TYPE_ENC(0, 0x20000000, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "",
@@ -3531,6 +3531,136 @@ static struct btf_raw_test raw_tests[] = {
 	.max_entries = 1,
 },
 
+{
+	.descr = "float test #1, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),		/* [2] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 4),		/* [3] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 8),		/* [4] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 12),		/* [5] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 16),		/* [6] */
+		BTF_STRUCT_ENC(NAME_TBD, 5, 48),		/* [7] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 3, 32),
+		BTF_MEMBER_ENC(NAME_TBD, 4, 64),
+		BTF_MEMBER_ENC(NAME_TBD, 5, 128),
+		BTF_MEMBER_ENC(NAME_TBD, 6, 256),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
+		    "\0floats\0a\0b\0c\0d\0e"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 48,
+	.key_type_id = 1,
+	.value_type_id = 7,
+	.max_entries = 1,
+},
+{
+	.descr = "float test #2, invalid vlen",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
+								/* [2] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "vlen != 0",
+},
+{
+	.descr = "float test #3, invalid kind_flag",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
+								/* [2] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid btf_info kind_flag",
+},
+{
+	.descr = "float test #4, member does not fit",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 4),		/* [2] */
+		BTF_STRUCT_ENC(NAME_TBD, 1, 2),			/* [3] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float\0floats\0x"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+{
+	.descr = "float test #5, member is not properly aligned",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 4),		/* [2] */
+		BTF_STRUCT_ENC(NAME_TBD, 1, 8),			/* [3] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 8),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float\0floats\0x"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Member is not properly aligned",
+},
+{
+	.descr = "float test #6, invalid size",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 6),		/* [2] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 6,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_size",
+},
+
 }; /* struct btf_raw_test raw_tests[] */
 
 static const char *get_next_str(const char *start, const char *end)
@@ -6281,11 +6411,12 @@ const struct btf_dedup_test dedup_tests[] = {
 			/* int[16] */
 			BTF_TYPE_ARRAY_ENC(1, 1, 16),					/* [2] */
 			/* struct s { */
-			BTF_STRUCT_ENC(NAME_NTH(2), 4, 84),				/* [3] */
+			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [3] */
 				BTF_MEMBER_ENC(NAME_NTH(3), 4, 0),	/* struct s *next;	*/
 				BTF_MEMBER_ENC(NAME_NTH(4), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(5), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(6), 1, 640),	/* int c;		*/
+				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
@@ -6296,39 +6427,43 @@ const struct btf_dedup_test dedup_tests[] = {
 			/* full copy of the above */
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [7] */
 			BTF_TYPE_ARRAY_ENC(7, 7, 16),					/* [8] */
-			BTF_STRUCT_ENC(NAME_NTH(2), 4, 84),				/* [9] */
+			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [9] */
 				BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
 				BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
 				BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
 				BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
+				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
 			BTF_PTR_ENC(9),							/* [10] */
 			BTF_PTR_ENC(12),						/* [11] */
 			BTF_CONST_ENC(7),						/* [12] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [13] */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
+		BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
 	},
 	.expect = {
 		.raw_types = {
 			/* int */
-			BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
 			/* int[16] */
 			BTF_TYPE_ARRAY_ENC(1, 1, 16),					/* [2] */
 			/* struct s { */
-			BTF_STRUCT_ENC(NAME_NTH(6), 4, 84),				/* [3] */
-				BTF_MEMBER_ENC(NAME_NTH(5), 4, 0),	/* struct s *next;	*/
+			BTF_STRUCT_ENC(NAME_NTH(8), 5, 88),				/* [3] */
+				BTF_MEMBER_ENC(NAME_NTH(7), 4, 0),	/* struct s *next;	*/
 				BTF_MEMBER_ENC(NAME_NTH(1), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(2), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(3), 1, 640),	/* int c;		*/
+				BTF_MEMBER_ENC(NAME_NTH(4), 7, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
 			BTF_PTR_ENC(6),							/* [5] */
 			/* const -> [1] int */
 			BTF_CONST_ENC(1),						/* [6] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [7] */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
+		BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
@@ -6449,9 +6584,10 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
+			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
 	},
 	.expect = {
 		.raw_types = {
@@ -6474,16 +6610,17 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
+			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
 	},
 },
 {
-	.descr = "dedup: no int duplicates",
+	.descr = "dedup: no int/float duplicates",
 	.input = {
 		.raw_types = {
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
@@ -6498,9 +6635,15 @@ const struct btf_dedup_test dedup_tests[] = {
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
 			/* different byte size */
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			/* all allowed sizes */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0int\0some other int"),
+		BTF_STR_SEC("\0int\0some other int\0float"),
 	},
 	.expect = {
 		.raw_types = {
@@ -6516,9 +6659,15 @@ const struct btf_dedup_test dedup_tests[] = {
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
 			/* different byte size */
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			/* all allowed sizes */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0int\0some other int"),
+		BTF_STR_SEC("\0int\0some other int\0float"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
@@ -6630,6 +6779,7 @@ static int btf_type_size(const struct btf_type *t)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_FLOAT:
 		return base_size;
 	case BTF_KIND_INT:
 		return base_size + sizeof(__u32);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 06eb956ff7bb..d94dcead72e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -266,6 +266,7 @@ static int duration = 0;
 		.arr_elem_sz = sizeof(((type *)0)->arr_field[0]),	\
 		.ptr_sz = 8, /* always 8-byte pointer for BPF */	\
 		.enum_sz = sizeof(((type *)0)->enum_field),		\
+		.float_sz = sizeof(((type *)0)->float_field),		\
 	}
 
 #define SIZE_CASE(name) {						\
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
new file mode 100644
index 000000000000..68eb12a287d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "for_each_hash_map_elem.skel.h"
+#include "for_each_array_map_elem.skel.h"
+
+static unsigned int duration;
+
+static void test_hash_map(void)
+{
+	int i, err, hashmap_fd, max_entries, percpu_map_fd;
+	struct for_each_hash_map_elem *skel;
+	__u64 *percpu_valbuf = NULL;
+	__u32 key, num_cpus, retval;
+	__u64 val;
+
+	skel = for_each_hash_map_elem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
+		return;
+
+	hashmap_fd = bpf_map__fd(skel->maps.hashmap);
+	max_entries = bpf_map__max_entries(skel->maps.hashmap);
+	for (i = 0; i < max_entries; i++) {
+		key = i;
+		val = i + 1;
+		err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
+		if (!ASSERT_OK(err, "map_update"))
+			goto out;
+	}
+
+	num_cpus = bpf_num_possible_cpus();
+	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+		goto out;
+
+	key = 1;
+	for (i = 0; i < num_cpus; i++)
+		percpu_valbuf[i] = i + 1;
+	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	if (!ASSERT_OK(err, "percpu_map_update"))
+		goto out;
+
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+				&retval, &duration);
+	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, retval))
+		goto out;
+
+	ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
+	ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
+
+	key = 1;
+	err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
+	ASSERT_ERR(err, "hashmap_lookup");
+
+	ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
+	ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
+	ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
+	ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
+	ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
+	ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
+out:
+	free(percpu_valbuf);
+	for_each_hash_map_elem__destroy(skel);
+}
+
+static void test_array_map(void)
+{
+	__u32 key, num_cpus, max_entries, retval;
+	int i, arraymap_fd, percpu_map_fd, err;
+	struct for_each_array_map_elem *skel;
+	__u64 *percpu_valbuf = NULL;
+	__u64 val, expected_total;
+
+	skel = for_each_array_map_elem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
+		return;
+
+	arraymap_fd = bpf_map__fd(skel->maps.arraymap);
+	expected_total = 0;
+	max_entries = bpf_map__max_entries(skel->maps.arraymap);
+	for (i = 0; i < max_entries; i++) {
+		key = i;
+		val = i + 1;
+		/* skip the last iteration for expected total */
+		if (i != max_entries - 1)
+			expected_total += val;
+		err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
+		if (!ASSERT_OK(err, "map_update"))
+			goto out;
+	}
+
+	num_cpus = bpf_num_possible_cpus();
+	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+		goto out;
+
+	key = 0;
+	for (i = 0; i < num_cpus; i++)
+		percpu_valbuf[i] = i + 1;
+	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	if (!ASSERT_OK(err, "percpu_map_update"))
+		goto out;
+
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+				&retval, &duration);
+	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, retval))
+		goto out;
+
+	ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
+	ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
+
+out:
+	free(percpu_valbuf);
+	for_each_array_map_elem__destroy(skel);
+}
+
+void test_for_each(void)
+{
+	if (test__start_subtest("hash_map"))
+		test_hash_map();
+	if (test__start_subtest("array_map"))
+		test_array_map();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 935a294f049a..131d7f7eeb42 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -2,12 +2,31 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 
-void test_prog_run_xattr(void)
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
 {
-	const char *file = "./test_pkt_access.o";
-	struct bpf_object *obj;
-	char buf[10];
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	int err;
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+		return;
+
+	CHECK(run_cnt != info.run_cnt, "run_cnt",
+	      "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_xattr(void)
+{
+	struct test_pkt_access *skel;
+	int err, stats_fd = -1;
+	char buf[10] = {};
+	__u64 run_cnt = 0;
+
 	struct bpf_prog_test_run_attr tattr = {
 		.repeat = 1,
 		.data_in = &pkt_v4,
@@ -16,12 +35,15 @@ void test_prog_run_xattr(void)
 		.data_size_out = 5,
 	};
 
-	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &tattr.prog_fd);
-	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
 		return;
 
-	memset(buf, 0, sizeof(buf));
+	skel = test_pkt_access__open_and_load();
+	if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
+		goto cleanup;
+
+	tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
 
 	err = bpf_prog_test_run_xattr(&tattr);
 	CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
@@ -34,8 +56,12 @@ void test_prog_run_xattr(void)
 	CHECK_ATTR(buf[5] != 0, "overflow",
 	      "BPF_PROG_TEST_RUN ignored size hint\n");
 
+	run_cnt += tattr.repeat;
+	check_run_cnt(tattr.prog_fd, run_cnt);
+
 	tattr.data_out = NULL;
 	tattr.data_size_out = 0;
+	tattr.repeat = 2;
 	errno = 0;
 
 	err = bpf_prog_test_run_xattr(&tattr);
@@ -46,5 +72,12 @@ void test_prog_run_xattr(void)
 	err = bpf_prog_test_run_xattr(&tattr);
 	CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
 
-	bpf_object__close(obj);
+	run_cnt += tattr.repeat;
+	check_run_cnt(tattr.prog_fd, run_cnt);
+
+cleanup:
+	if (skel)
+		test_pkt_access__destroy(skel);
+	if (stats_fd != -1)
+		close(stats_fd);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 9ff0412e1fd3..45c82db3c58c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -241,6 +241,48 @@ fail:
 	return -1;
 }
 
+static __u64 socket_cookie(int fd)
+{
+	__u64 cookie;
+	socklen_t cookie_len = sizeof(cookie);
+
+	if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
+		  "getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
+		return 0;
+	return cookie;
+}
+
+static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
+			      const char *remote_ip, __u16 remote_port)
+{
+	void *local, *remote;
+	int err;
+
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->local_port = local_port;
+	ctx->remote_port = htons(remote_port);
+
+	if (is_ipv6(local_ip)) {
+		ctx->family = AF_INET6;
+		local = &ctx->local_ip6[0];
+		remote = &ctx->remote_ip6[0];
+	} else {
+		ctx->family = AF_INET;
+		local = &ctx->local_ip4;
+		remote = &ctx->remote_ip4;
+	}
+
+	err = inet_pton(ctx->family, local_ip, local);
+	if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
+		return 1;
+
+	err = inet_pton(ctx->family, remote_ip, remote);
+	if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
+		return 1;
+
+	return 0;
+}
+
 static int send_byte(int fd)
 {
 	ssize_t n;
@@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel)
 
 static void run_sk_assign(struct test_sk_lookup *skel,
 			  struct bpf_program *lookup_prog,
-			  const char *listen_ip, const char *connect_ip)
+			  const char *remote_ip, const char *local_ip)
 {
-	int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
-	struct bpf_link *lookup_link;
+	int server_fds[MAX_SERVERS] = { -1 };
+	struct bpf_sk_lookup ctx;
+	__u64 server_cookie;
 	int i, err;
 
-	lookup_link = attach_lookup_prog(lookup_prog);
-	if (!lookup_link)
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.ctx_in = &ctx,
+		.ctx_size_in = sizeof(ctx),
+		.ctx_out = &ctx,
+		.ctx_size_out = sizeof(ctx),
+	);
+
+	if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
 		return;
 
+	ctx.protocol = IPPROTO_TCP;
+
 	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
-		server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+		server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
 		if (server_fds[i] < 0)
 			goto close_servers;
 
@@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel,
 			goto close_servers;
 	}
 
-	client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
-	if (client_fd < 0)
+	server_cookie = socket_cookie(server_fds[SERVER_B]);
+	if (!server_cookie)
+		return;
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
+	if (CHECK(err, "test_run", "failed with error %d\n", errno))
+		goto close_servers;
+
+	if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
 		goto close_servers;
 
-	peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
-	if (CHECK(peer_fd < 0, "accept", "failed\n"))
-		goto close_client;
+	CHECK(ctx.cookie != server_cookie, "ctx.cookie",
+	      "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
 
-	close(peer_fd);
-close_client:
-	close(client_fd);
 close_servers:
 	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
 		if (server_fds[i] != -1)
 			close(server_fds[i]);
 	}
-	bpf_link__destroy(lookup_link);
 }
 
 static void run_sk_assign_v4(struct test_sk_lookup *skel,
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d7d65a700799..c26e6bf05e49 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
 					struct bpf_map *inner_map, int family,
 					int sotype)
 {
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
 	int sock_map = bpf_map__fd(inner_map);
 	int err;
@@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
 					struct bpf_map *inner_map, int family,
 					int sotype)
 {
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
 	int sock_map = bpf_map__fd(inner_map);
 	int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
new file mode 100644
index 000000000000..035c263aab1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <unistd.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#include <sys/types.h>
+#include <test_progs.h>
+#include "task_local_storage.skel.h"
+#include "task_local_storage_exit_creds.skel.h"
+#include "task_ls_recursion.skel.h"
+
+static void test_sys_enter_exit(void)
+{
+	struct task_local_storage *skel;
+	int err;
+
+	skel = task_local_storage__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	skel->bss->target_pid = syscall(SYS_gettid);
+
+	err = task_local_storage__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	syscall(SYS_gettid);
+	syscall(SYS_gettid);
+
+	/* 3x syscalls: 1x attach and 2x gettid */
+	ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+	ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+	ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+	task_local_storage__destroy(skel);
+}
+
+static void test_exit_creds(void)
+{
+	struct task_local_storage_exit_creds *skel;
+	int err;
+
+	skel = task_local_storage_exit_creds__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	err = task_local_storage_exit_creds__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	/* trigger at least one exit_creds() */
+	if (CHECK_FAIL(system("ls > /dev/null")))
+		goto out;
+
+	/* sync rcu to make sure exit_creds() is called for "ls" */
+	kern_sync_rcu();
+	ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
+	ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
+out:
+	task_local_storage_exit_creds__destroy(skel);
+}
+
+static void test_recursion(void)
+{
+	struct task_ls_recursion *skel;
+	int err;
+
+	skel = task_ls_recursion__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	err = task_ls_recursion__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	/* trigger sys_enter, make sure it does not cause deadlock */
+	syscall(SYS_gettid);
+
+out:
+	task_ls_recursion__destroy(skel);
+}
+
+void test_task_local_storage(void)
+{
+	if (test__start_subtest("sys_enter_exit"))
+		test_sys_enter_exit();
+	if (test__start_subtest("exit_creds"))
+		test_exit_creds();
+	if (test__start_subtest("recursion"))
+		test_recursion();
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 3ac0c9afc35a..8aaa24a00322 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -213,6 +213,12 @@ struct struct_with_embedded_stuff {
 	struct_in_array_t *v;
 };
 
+struct float_struct {
+	float f;
+	const double *d;
+	volatile long double *ld;
+};
+
 struct root_struct {
 	enum e1 _1;
 	enum e2 _2;
@@ -227,6 +233,7 @@ struct root_struct {
 	union_fwd_t *_12;
 	union_fwd_ptr_t _13;
 	struct struct_with_embedded_stuff _14;
+	struct float_struct _15;
 };
 
 /* ------ END-EXPECTED-OUTPUT ------ */
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 9a2850850121..9982eb969048 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -807,6 +807,7 @@ struct core_reloc_size_output {
 	int arr_elem_sz;
 	int ptr_sz;
 	int enum_sz;
+	int float_sz;
 };
 
 struct core_reloc_size {
@@ -816,6 +817,7 @@ struct core_reloc_size {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE = 123 } enum_field;
+	float float_field;
 };
 
 struct core_reloc_size___diff_sz {
@@ -825,6 +827,7 @@ struct core_reloc_size___diff_sz {
 	char arr_field[10];
 	void *ptr_field;
 	enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
+	double float_field;
 };
 
 /* Error case of two candidates with the fields (int_field) at the same
@@ -839,6 +842,7 @@ struct core_reloc_size___err_ambiguous1 {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE___1 = 123 } enum_field;
+	float float_field;
 };
 
 struct core_reloc_size___err_ambiguous2 {
@@ -850,6 +854,7 @@ struct core_reloc_size___err_ambiguous2 {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE___2 = 123 } enum_field;
+	float float_field;
 };
 
 /*
diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
new file mode 100644
index 000000000000..75e8e1069fe7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 3);
+	__type(key, __u32);
+	__type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+	int output;
+};
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		 struct callback_ctx *data)
+{
+	data->output += *val;
+	if (*key == 1)
+		return 1; /* stop the iteration */
+	return 0;
+}
+
+__u32 cpu = 0;
+__u64 percpu_val = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		  struct callback_ctx *data)
+{
+	cpu = bpf_get_smp_processor_id();
+	percpu_val = *val;
+	return 0;
+}
+
+u32 arraymap_output = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+	struct callback_ctx data;
+
+	data.output = 0;
+	bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
+	arraymap_output = data.output;
+
+	bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
new file mode 100644
index 000000000000..913dd91aafff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, __u32);
+	__type(value, __u64);
+} hashmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+	struct __sk_buff *ctx;
+	int input;
+	int output;
+};
+
+static __u64
+check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		struct callback_ctx *data)
+{
+	struct __sk_buff *skb = data->ctx;
+	__u32 k;
+	__u64 v;
+
+	if (skb) {
+		k = *key;
+		v = *val;
+		if (skb->len == 10000 && k == 10 && v == 10)
+			data->output = 3; /* impossible path */
+		else
+			data->output = 4;
+	} else {
+		data->output = data->input;
+		bpf_map_delete_elem(map, key);
+	}
+
+	return 0;
+}
+
+__u32 cpu = 0;
+__u32 percpu_called = 0;
+__u32 percpu_key = 0;
+__u64 percpu_val = 0;
+int percpu_output = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		  struct callback_ctx *unused)
+{
+	struct callback_ctx data;
+
+	percpu_called++;
+	cpu = bpf_get_smp_processor_id();
+	percpu_key = *key;
+	percpu_val = *val;
+
+	data.ctx = 0;
+	data.input = 100;
+	data.output = 0;
+	bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+	percpu_output = data.output;
+
+	return 0;
+}
+
+int hashmap_output = 0;
+int hashmap_elems = 0;
+int percpu_map_elems = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+	struct callback_ctx data;
+
+	data.ctx = skb;
+	data.input = 10;
+	data.output = 0;
+	hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+	hashmap_output = data.output;
+
+	percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
+						 (void *)0, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
new file mode 100644
index 000000000000..38de0331e6b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS	6
+
+/* Verifier will fail with SG_MAX = 128. The failure can be
+ * workarounded with a smaller SG_MAX, e.g. 10.
+ */
+#define WORKAROUND
+#ifdef WORKAROUND
+#define SG_MAX		10
+#else
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX		128
+#endif
+
+#define SG_CHAIN	0x01UL
+#define SG_END		0x02UL
+
+struct scatterlist {
+	unsigned long   page_link;
+	unsigned int    offset;
+	unsigned int    length;
+};
+
+#define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN)
+#define sg_is_last(sg)		((sg)->page_link & SG_END)
+#define sg_chain_ptr(sg)	\
+	((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+
+static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+	struct scatterlist sg;
+
+	bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+	if (sg_is_last(&sg))
+		return NULL;
+
+	sgp++;
+
+	bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+	if (sg_is_chain(&sg))
+		sgp = sg_chain_ptr(&sg);
+
+	return sgp;
+}
+
+static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
+{
+	struct scatterlist *sgp;
+
+	bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
+	return sgp;
+}
+
+int config = 0;
+int result = 0;
+
+SEC("kprobe/virtqueue_add_sgs")
+int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
+	       unsigned int out_sgs, unsigned int in_sgs)
+{
+	struct scatterlist *sgp = NULL;
+	__u64 length1 = 0, length2 = 0;
+	unsigned int i, n, len;
+
+	if (config != 0)
+		return 0;
+
+	for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
+		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+		     sgp = __sg_next(sgp)) {
+			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			length1 += len;
+			n++;
+		}
+	}
+
+	for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
+		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+		     sgp = __sg_next(sgp)) {
+			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			length2 += len;
+			n++;
+		}
+	}
+
+	config = 1;
+	result = length2 - length1;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c
new file mode 100644
index 000000000000..80a0a20db88d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} enter_id SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	ptr = bpf_task_storage_get(&enter_id, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	__sync_fetch_and_add(&enter_cnt, 1);
+	*ptr = MAGIC_VALUE + enter_cnt;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	ptr = bpf_task_storage_get(&enter_id, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	__sync_fetch_and_add(&exit_cnt, 1);
+	if (*ptr != MAGIC_VALUE + exit_cnt)
+		__sync_fetch_and_add(&mismatch_cnt, 1);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
new file mode 100644
index 000000000000..81758c0aef99
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, __u64);
+} task_storage SEC(".maps");
+
+int valid_ptr_count = 0;
+int null_ptr_count = 0;
+
+SEC("fentry/exit_creds")
+int BPF_PROG(trace_exit_creds, struct task_struct *task)
+{
+	__u64 *ptr;
+
+	ptr = bpf_task_storage_get(&task_storage, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		__sync_fetch_and_add(&valid_ptr_count, 1);
+	else
+		__sync_fetch_and_add(&null_ptr_count, 1);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
new file mode 100644
index 000000000000..564583dca7c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_a SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_b SEC(".maps");
+
+SEC("fentry/bpf_local_storage_lookup")
+int BPF_PROG(on_lookup)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+
+	bpf_task_storage_delete(&map_a, task);
+	bpf_task_storage_delete(&map_b, task);
+	return 0;
+}
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	long *ptr;
+
+	ptr = bpf_task_storage_get(&map_a, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr += 1;
+
+	ptr = bpf_task_storage_get(&map_b, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr += 1;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	ptr = bpf_task_storage_get(&map_a, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr = 200;
+
+	ptr = bpf_task_storage_get(&map_b, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr = 100;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index d7fb6cfc7891..7b2d576aeea1 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -21,6 +21,7 @@ struct core_reloc_size_output {
 	int arr_elem_sz;
 	int ptr_sz;
 	int enum_sz;
+	int float_sz;
 };
 
 struct core_reloc_size {
@@ -30,6 +31,7 @@ struct core_reloc_size {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE = 123 } enum_field;
+	float float_field;
 };
 
 SEC("raw_tracepoint/sys_enter")
@@ -45,6 +47,7 @@ int test_core_size(void *ctx)
 	out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
 	out->ptr_sz = bpf_core_field_size(in->ptr_field);
 	out->enum_sz = bpf_core_field_size(in->enum_field);
+	out->float_sz = bpf_core_field_size(in->float_field);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 1032b292af5b..ac6f7f205e25 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -64,6 +64,10 @@ static const int PROG_DONE = 1;
 static const __u32 KEY_SERVER_A = SERVER_A;
 static const __u32 KEY_SERVER_B = SERVER_B;
 
+static const __u16 SRC_PORT = bpf_htons(8008);
+static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
+static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
+
 static const __u16 DST_PORT = 7007; /* Host byte order */
 static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
 static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
@@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 	if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
 		return SK_DROP;
 
-	/* Narrow loads from remote_port field. Expect non-0 value. */
-	if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
-	    LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
+	/* Narrow loads from remote_port field. Expect SRC_PORT. */
+	if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
+	    LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
+	    LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
 		return SK_DROP;
-	if (LSW(ctx->remote_port, 0) == 0)
+	if (LSW(ctx->remote_port, 0) != SRC_PORT)
 		return SK_DROP;
 
 	/* Narrow loads from local_port field. Expect DST_PORT. */
@@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 
 	/* Narrow loads from IPv4 fields */
 	if (v4) {
-		/* Expect non-0.0.0.0 in remote_ip4 */
-		if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
-		    LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
+		/* Expect SRC_IP4 in remote_ip4 */
+		if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
 			return SK_DROP;
-		if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
+		if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
 			return SK_DROP;
 
 		/* Expect DST_IP4 in local_ip4 */
@@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 
 	/* Narrow loads from IPv6 fields */
 	if (!v4) {
-		/* Expect non-:: IP in remote_ip6 */
-		if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
-		    LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
-		    LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
-		    LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
-		    LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
-		    LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
-		    LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
-		    LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
+		/* Expect SRC_IP6 in remote_ip6 */
+		if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
 			return SK_DROP;
-		if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
-		    LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
-		    LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
-		    LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
+		if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
+		    LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
+		    LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
+		    LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
 			return SK_DROP;
 		/* Expect DST_IP6 in local_ip6 */
 		if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a3a366c57ce1..fa221141e9c1 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -31,13 +31,13 @@ struct {
 static volatile bool test_sockmap; /* toggled by user-space */
 
 SEC("sk_skb/stream_parser")
-int prog_skb_parser(struct __sk_buff *skb)
+int prog_stream_parser(struct __sk_buff *skb)
 {
 	return skb->len;
 }
 
 SEC("sk_skb/stream_verdict")
-int prog_skb_verdict(struct __sk_buff *skb)
+int prog_stream_verdict(struct __sk_buff *skb)
 {
 	unsigned int *count;
 	__u32 zero = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a7c394..84cd63259554 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -24,14 +24,29 @@ static const int cfg_port = 8000;
 
 static const int cfg_udp_src = 20000;
 
+#define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
+
 #define	UDP_PORT		5555
 #define	MPLS_OVER_UDP_PORT	6635
 #define	ETH_OVER_UDP_PORT	7777
+#define	VXLAN_UDP_PORT		8472
+
+#define	EXTPROTO_VXLAN	0x1
+
+#define	VXLAN_N_VID     (1u << 24)
+#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
+#define	VXLAN_FLAGS     0x8
+#define	VXLAN_VNI       1
 
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 						     MPLS_LS_S_MASK | 0xff);
 
+struct vxlanhdr {
+	__be32 vx_flags;
+	__be32 vx_vni;
+} __attribute__((packed));
+
 struct gre_hdr {
 	__be16 flags;
 	__be16 protocol;
@@ -45,13 +60,13 @@ union l4hdr {
 struct v4hdr {
 	struct iphdr ip;
 	union l4hdr l4hdr;
-	__u8 pad[16];			/* enough space for L2 header */
+	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 } __attribute__((packed));
 
 struct v6hdr {
 	struct ipv6hdr ip;
 	union l4hdr l4hdr;
-	__u8 pad[16];			/* enough space for L2 header */
+	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 } __attribute__((packed));
 
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 	iph->check = ~((csum & 0xffff) + (csum >> 16));
 }
 
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
-				      __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+					__u16 l2_proto, __u16 ext_proto)
 {
 	__u16 udp_dst = UDP_PORT;
 	struct iphdr iph_inner;
 	struct v4hdr h_outer;
 	struct tcphdr tcph;
 	int olen, l2_len;
+	__u8 *l2_hdr = NULL;
 	int tcp_off;
 	__u64 flags;
 
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 		break;
 	case ETH_P_TEB:
 		l2_len = ETH_HLEN;
-		udp_dst = ETH_OVER_UDP_PORT;
+		if (ext_proto & EXTPROTO_VXLAN) {
+			udp_dst = VXLAN_UDP_PORT;
+			l2_len += sizeof(struct vxlanhdr);
+		} else
+			udp_dst = ETH_OVER_UDP_PORT;
 		break;
 	}
 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 	}
 
 	/* add L2 encap (if specified) */
+	l2_hdr = (__u8 *)&h_outer + olen;
 	switch (l2_proto) {
 	case ETH_P_MPLS_UC:
-		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+		*(__u32 *)l2_hdr = mpls_label;
 		break;
 	case ETH_P_TEB:
-		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
-				       ETH_HLEN))
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+		if (ext_proto & EXTPROTO_VXLAN) {
+			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+			vxlan_hdr->vx_flags = VXLAN_FLAGS;
+			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+			l2_hdr += sizeof(struct vxlanhdr);
+		}
+
+		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
 			return TC_ACT_SHOT;
+
 		break;
 	}
 	olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 	return TC_ACT_OK;
 }
 
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 				      __u16 l2_proto)
 {
+	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+					__u16 l2_proto, __u16 ext_proto)
+{
 	__u16 udp_dst = UDP_PORT;
 	struct ipv6hdr iph_inner;
 	struct v6hdr h_outer;
 	struct tcphdr tcph;
 	int olen, l2_len;
+	__u8 *l2_hdr = NULL;
 	__u16 tot_len;
 	__u64 flags;
 
@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 		break;
 	case ETH_P_TEB:
 		l2_len = ETH_HLEN;
-		udp_dst = ETH_OVER_UDP_PORT;
+		if (ext_proto & EXTPROTO_VXLAN) {
+			udp_dst = VXLAN_UDP_PORT;
+			l2_len += sizeof(struct vxlanhdr);
+		} else
+			udp_dst = ETH_OVER_UDP_PORT;
 		break;
 	}
 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
-			  sizeof(h_outer.l4hdr.udp);
+			  sizeof(h_outer.l4hdr.udp) + l2_len;
 		h_outer.l4hdr.udp.check = 0;
 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
 		break;
@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	}
 
 	/* add L2 encap (if specified) */
+	l2_hdr = (__u8 *)&h_outer + olen;
 	switch (l2_proto) {
 	case ETH_P_MPLS_UC:
-		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+		*(__u32 *)l2_hdr = mpls_label;
 		break;
 	case ETH_P_TEB:
-		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
-				       ETH_HLEN))
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+		if (ext_proto & EXTPROTO_VXLAN) {
+			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+			vxlan_hdr->vx_flags = VXLAN_FLAGS;
+			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+			l2_hdr += sizeof(struct vxlanhdr);
+		}
+
+		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
 			return TC_ACT_SHOT;
 		break;
 	}
@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	return TC_ACT_OK;
 }
 
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+				      __u16 l2_proto)
+{
+	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
 SEC("encap_ipip_none")
 int __encap_ipip_none(struct __sk_buff *skb)
 {
@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
+SEC("encap_vxlan_eth")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return __encap_ipv4(skb, IPPROTO_UDP,
+				    ETH_P_TEB,
+				    EXTPROTO_VXLAN);
+	else
+		return TC_ACT_OK;
+}
+
 SEC("encap_sit_none")
 int __encap_sit_none(struct __sk_buff *skb)
 {
@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
+SEC("encap_ip6vxlan_eth")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return __encap_ipv6(skb, IPPROTO_UDP,
+				    ETH_P_TEB,
+				    EXTPROTO_VXLAN);
+	else
+		return TC_ACT_OK;
+}
+
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
 	char buf[sizeof(struct v6hdr)];
@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		case ETH_OVER_UDP_PORT:
 			olen += ETH_HLEN;
 			break;
+		case VXLAN_UDP_PORT:
+			olen += ETH_HLEN + sizeof(struct vxlanhdr);
+			break;
 		}
 		break;
 	default:
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 2db3c60e1e61..ac349a5cea7e 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,23 +85,6 @@ make_with_tmpdir() {
 	echo
 }
 
-make_doc_and_clean() {
-	echo -e "\$PWD:    $PWD"
-	echo -e "command: make -s $* doc >/dev/null"
-	RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
-	if [ $? -ne 0 ] ; then
-		ERROR=1
-		printf "FAILURE: Errors or warnings when building documentation\n"
-	fi
-	(
-		if [ $# -ge 1 ] ; then
-			cd ${@: -1}
-		fi
-		make -s doc-clean
-	)
-	echo
-}
-
 echo "Trying to build bpftool"
 echo -e "... through kbuild\n"
 
@@ -162,7 +145,3 @@ make_and_clean
 make_with_tmpdir OUTPUT
 
 make_with_tmpdir O
-
-echo -e "Checking documentation build\n"
-# From tools/bpf/bpftool
-make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 2023725f1962..e2394eea4b7f 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -66,4 +66,7 @@
 #define BTF_FUNC_ENC(name, func_proto) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
 
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+
 #endif /* _TEST_BTF_H */
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
new file mode 100755
index 000000000000..7eb940a7b2eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+
+for tgt in docs docs-clean; do
+	make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
+done
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f7c2fd89d01a..e87c8546230e 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -152,6 +152,17 @@ extern int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_LT(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act < ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld >= expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
 #define ASSERT_STREQ(actual, expected, name) ({				\
 	static int duration = 0;					\
 	const char *___act = actual;					\
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 427ca00a3217..eefd445b96fc 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -732,7 +732,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 		 * socket is not a valid test. So in this case lets not
 		 * enable kTLS but still run the test.
 		 */
-		if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+		if (!txmsg_redir || txmsg_ingress) {
 			err = sockmap_init_ktls(opt->verbose, rx_fd);
 			if (err)
 				return err;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 7c76b841b17b..c9dde9b9d987 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -44,8 +44,8 @@ setup() {
 	# clamp route to reserve room for tunnel headers
 	ip -netns "${ns1}" -4 route flush table main
 	ip -netns "${ns1}" -6 route flush table main
-	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
-	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
+	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
+	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
 
 	sleep 1
 
@@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then
 	echo "sit"
 	$0 ipv6 sit none 100
 
+	echo "ip4 vxlan"
+	$0 ipv4 vxlan eth 2000
+
+	echo "ip6 vxlan"
+	$0 ipv6 ip6vxlan eth 2000
+
 	for mac in none mpls eth ; do
 		echo "ip gre $mac"
 		$0 ipv4 gre $mac 100
@@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then
 	targs="encap fou encap-sport auto encap-dport $dport"
 elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
 	ttype=$gretaptype
+elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
+	ttype="vxlan"
+	targs="id 1 dstport 8472 udp6zerocsumrx"
 else
 	ttype=$tuntype
 	targs=""
@@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
 elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
 	# No support for TEB fou tunnel; expect failure.
 	expect_tun_fail=1
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
 	# Share ethernet address between tunnel/veth2 so L2 decap works.
 	ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
 		  awk '/ether/ { print $2 }')
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 58b5a349d3ba..1512092e1e68 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -105,7 +105,7 @@ struct bpf_test {
 	enum bpf_prog_type prog_type;
 	uint8_t flags;
 	void (*fill_helper)(struct bpf_test *self);
-	uint8_t runs;
+	int runs;
 #define bpf_testdata_struct_t					\
 	struct {						\
 		uint32_t retval, retval_unpriv;			\
@@ -1165,7 +1165,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	run_errs = 0;
 	run_successes = 0;
-	if (!alignment_prevented_execution && fd_prog >= 0) {
+	if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
 		uint32_t expected_val;
 		int i;
 
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 88a7483eaae4..56d4474e2c83 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -71,13 +71,21 @@
 #
 # Run (full output without color-coding):
 #   sudo ./test_xsk.sh
+#
+# Run with verbose output:
+#   sudo ./test_xsk.sh -v
+#
+# Run and dump packet contents:
+#   sudo ./test_xsk.sh -D
 
 . xsk_prereqs.sh
 
-while getopts c flag
+while getopts "cvD" flag
 do
 	case "${flag}" in
 		c) colorconsole=1;;
+		v) verbose=1;;
+		D) dump_pkts=1;;
 	esac
 done
 
@@ -95,13 +103,17 @@ NS1=af_xdp${VETH1_POSTFIX}
 MTU=1500
 
 setup_vethPairs() {
-	echo "setting up ${VETH0}: namespace: ${NS0}"
+	if [[ $verbose -eq 1 ]]; then
+	        echo "setting up ${VETH0}: namespace: ${NS0}"
+	fi
 	ip netns add ${NS1}
 	ip link add ${VETH0} type veth peer name ${VETH1}
 	if [ -f /proc/net/if_inet6 ]; then
 		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
 	fi
-	echo "setting up ${VETH1}: namespace: ${NS1}"
+	if [[ $verbose -eq 1 ]]; then
+	        echo "setting up ${VETH1}: namespace: ${NS1}"
+	fi
 	ip link set ${VETH1} netns ${NS1}
 	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
 	ip link set ${VETH0} mtu ${MTU}
@@ -125,121 +137,24 @@ echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
 
 validate_veth_spec_file
 
-echo "Spec file created: ${SPECFILE}"
-
-test_status $retval "${TEST_NAME}"
-
-## START TESTS
-
-statusList=()
-
-### TEST 1
-TEST_NAME="XSK KSELFTEST FRAMEWORK"
-
-echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-retval=$?
-if [ $retval -eq 0 ]; then
-	echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
-	vethXDPnative ${VETH0} ${VETH1} ${NS1}
+if [[ $verbose -eq 1 ]]; then
+        echo "Spec file created: ${SPECFILE}"
+	VERBOSE_ARG="-v"
 fi
 
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 2
-TEST_NAME="SKB NOPOLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 3
-TEST_NAME="SKB POLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 4
-TEST_NAME="DRV NOPOLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 5
-TEST_NAME="DRV POLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 6
-TEST_NAME="SKB SOCKET TEARDOWN"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-T")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 7
-TEST_NAME="DRV SOCKET TEARDOWN"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-T")
-execxdpxceiver params
+if [[ $dump_pkts -eq 1 ]]; then
+	DUMP_PKTS_ARG="-D"
+fi
 
-retval=$?
 test_status $retval "${TEST_NAME}"
-statusList+=($retval)
 
-### TEST 8
-TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-B")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
+## START TESTS
 
-### TEST 9
-TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+statusList=()
 
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
+TEST_NAME="XSK KSELFTESTS"
 
-params=("-N" "-B")
-execxdpxceiver params
+execxdpxceiver
 
 retval=$?
 test_status $retval "${TEST_NAME}"
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index fb13ca2d5606..d78627be060f 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -239,6 +239,7 @@
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.runs = -1,
 },
 /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
 {
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 26ae8d0b6ce3..22554894db99 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -17,6 +17,9 @@ KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vm
 KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
 INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
 NUM_COMPILE_JOBS="$(nproc)"
+LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
+LOG_FILE="${LOG_FILE_BASE}.log"
+EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
 
 usage()
 {
@@ -146,7 +149,6 @@ update_init_script()
 	local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
 	local init_script="${init_script_dir}/S50-startup"
 	local command="$1"
-	local log_file="$2"
 
 	mount_image
 
@@ -163,11 +165,16 @@ EOF
 	sudo bash -c "cat >${init_script}" <<EOF
 #!/bin/bash
 
+# Have a default value in the exit status file
+# incase the VM is forcefully stopped.
+echo "130" > "/root/${EXIT_STATUS_FILE}"
+
 {
 	cd /root/bpf
 	echo ${command}
 	stdbuf -oL -eL ${command}
-} 2>&1 | tee /root/${log_file}
+	echo "\$?" > "/root/${EXIT_STATUS_FILE}"
+} 2>&1 | tee "/root/${LOG_FILE}"
 poweroff -f
 EOF
 
@@ -221,10 +228,12 @@ EOF
 copy_logs()
 {
 	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
-	local log_file="${mount_dir}/root/$1"
+	local log_file="${mount_dir}/root/${LOG_FILE}"
+	local exit_status_file="${mount_dir}/root/${EXIT_STATUS_FILE}"
 
 	mount_image
 	sudo cp ${log_file} "${OUTPUT_DIR}"
+	sudo cp ${exit_status_file} "${OUTPUT_DIR}"
 	sudo rm -f ${log_file}
 	unmount_image
 }
@@ -263,7 +272,6 @@ main()
 {
 	local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
 	local kernel_checkout=$(realpath "${script_dir}"/../../../../)
-	local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")"
 	# By default the script searches for the kernel in the checkout directory but
 	# it also obeys environment variables O= and KBUILD_OUTPUT=
 	local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
@@ -347,19 +355,23 @@ main()
 	fi
 
 	update_selftests "${kernel_checkout}" "${make_command}"
-	update_init_script "${command}" "${log_file}"
+	update_init_script "${command}"
 	run_vm "${kernel_bzimage}"
-	copy_logs "${log_file}"
-	echo "Logs saved in ${OUTPUT_DIR}/${log_file}"
+	copy_logs
+	echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
 }
 
 catch()
 {
 	local exit_code=$1
+	local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
 	# This is just a cleanup and the directory may
 	# have already been unmounted. So, don't let this
 	# clobber the error code we intend to return.
 	unmount_image || true
+	if [[ -f "${exit_status_file}" ]]; then
+		exit_code="$(cat ${exit_status_file})"
+	fi
 	exit ${exit_code}
 }
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index f4a96d5ff524..8b0f7fdd9003 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -18,12 +18,7 @@
  * These selftests test AF_XDP SKB and Native/DRV modes using veth
  * Virtual Ethernet interfaces.
  *
- * The following tests are run:
- *
- * 1. AF_XDP SKB mode
- *    Generic mode XDP is driver independent, used when the driver does
- *    not have support for XDP. Works on any netdevice using sockets and
- *    generic XDP path. XDP hook from netif_receive_skb().
+ * For each mode, the following tests are run:
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
  *    c. Socket Teardown
@@ -33,19 +28,21 @@
  *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
  *       completion rings on each socket, tx/rx in both directions. Only nopoll
  *       mode is used
+ *    e. Statistics
+ *       Trigger some error conditions and ensure that the appropriate statistics
+ *       are incremented. Within this test, the following statistics are tested:
+ *       i.   rx dropped
+ *            Increase the UMEM frame headroom to a value which results in
+ *            insufficient space in the rx buffer for both the packet and the headroom.
+ *       ii.  tx invalid
+ *            Set the 'len' field of tx descriptors to an invalid value (umem frame
+ *            size + 1).
+ *       iii. rx ring full
+ *            Reduce the size of the RX ring to a fraction of the fill ring size.
+ *       iv.  fill queue empty
+ *            Do not populate the fill queue and then try to receive pkts.
  *
- * 2. AF_XDP DRV/Native mode
- *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
- *    packets before SKB allocation. Provides better performance than SKB. Driver
- *    hook available just after DMA of buffer descriptor.
- *    a. nopoll
- *    b. poll
- *    c. Socket Teardown
- *    d. Bi-directional sockets
- *    - Only copy mode is supported because veth does not currently support
- *      zero-copy mode
- *
- * Total tests: 8
+ * Total tests: 10
  *
  * Flow:
  * -----
@@ -58,7 +55,7 @@
  * - Rx thread verifies if all 10k packets were received and delivered in-order,
  *   and have the right content
  *
- * Enable/disable debug mode:
+ * Enable/disable packet dump mode:
  * --------------------------
  * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
  * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
@@ -98,17 +95,24 @@ typedef __u16 __sum16;
 
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
-	ksft_test_result_fail
-	    ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
-	ksft_exit_xfail();
+	if (configured_mode == TEST_MODE_UNCONFIGURED) {
+		ksft_exit_fail_msg
+		("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+	} else {
+		ksft_test_result_fail
+		("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+		ksft_exit_xfail();
+	}
 }
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
-			       "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
-			       opt_bidi ? "Bi-directional Sockets" : ""))
+	(ksft_test_result_pass("PASS: %s %s %s%s%s\n", configured_mode ? "DRV" : "SKB",\
+			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
+			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
+			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
+			       test_type == TEST_TYPE_STATS ? "Stats" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -270,13 +274,20 @@ static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
 static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
 {
 	int ret;
+	struct xsk_umem_config cfg = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+		.frame_headroom = frame_headroom,
+		.flags = XSK_UMEM__DEFAULT_FLAGS
+	};
 
 	data->umem = calloc(1, sizeof(struct xsk_umem_info));
 	if (!data->umem)
 		exit_with_error(errno);
 
 	ret = xsk_umem__create(&data->umem->umem, buffer, size,
-			       &data->umem->fq, &data->umem->cq, NULL);
+			       &data->umem->fq, &data->umem->cq, &cfg);
 	if (ret)
 		exit_with_error(ret);
 
@@ -308,13 +319,13 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 		exit_with_error(errno);
 
 	ifobject->xsk->umem = ifobject->umem;
-	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	cfg.rx_size = rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
-	cfg.xdp_flags = opt_xdp_flags;
-	cfg.bind_flags = opt_xdp_bind_flags;
+	cfg.xdp_flags = xdp_flags;
+	cfg.bind_flags = xdp_bind_flags;
 
-	if (!opt_bidi) {
+	if (test_type != TEST_TYPE_BIDI) {
 		rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
 		txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
 	} else {
@@ -334,13 +345,8 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
 	{"queue", optional_argument, 0, 'q'},
-	{"poll", no_argument, 0, 'p'},
-	{"xdp-skb", no_argument, 0, 'S'},
-	{"xdp-native", no_argument, 0, 'N'},
-	{"copy", no_argument, 0, 'c'},
-	{"tear-down", no_argument, 0, 'T'},
-	{"bidi", optional_argument, 0, 'B'},
-	{"debug", optional_argument, 0, 'D'},
+	{"dump-pkts", optional_argument, 0, 'D'},
+	{"verbose", no_argument, 0, 'v'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
 };
@@ -352,13 +358,8 @@ static void usage(const char *prog)
 	    "  Options:\n"
 	    "  -i, --interface      Use interface\n"
 	    "  -q, --queue=n        Use queue n (default 0)\n"
-	    "  -p, --poll           Use poll syscall\n"
-	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
-	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
-	    "  -c, --copy           Force copy mode\n"
-	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
-	    "  -B, --bidi           Bi-directional sockets test\n"
-	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
+	    "  -D, --dump-pkts      Dump packets L2 - L5\n"
+	    "  -v, --verbose        Verbose output\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
 }
@@ -392,7 +393,7 @@ static void *nsswitchthread(void *args)
 			ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n",
 					      __func__, ifdict[targs->idx]->ifname);
 		} else {
-			ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname);
+			print_verbose("Interface found: %s\n", ifdict[targs->idx]->ifname);
 			targs->retptr = true;
 		}
 	}
@@ -422,7 +423,7 @@ static int validate_interfaces(void)
 			pthread_join(ns_thread, NULL);
 
 			if (targs->retptr)
-				ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
+				print_verbose("NS switched: %s\n", ifdict[i]->nsname);
 
 			free(targs);
 		} else {
@@ -432,7 +433,7 @@ static int validate_interfaces(void)
 				    ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
 				ret = false;
 			} else {
-				ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
+				print_verbose("Interface found: %s\n", ifdict[i]->ifname);
 			}
 		}
 	}
@@ -446,7 +447,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:DC:v", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -469,40 +470,26 @@ static void parse_command_line(int argc, char **argv)
 		case 'q':
 			opt_queue = atoi(optarg);
 			break;
-		case 'p':
-			opt_poll = 1;
-			break;
-		case 'S':
-			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
-			opt_xdp_bind_flags |= XDP_COPY;
-			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
-			break;
-		case 'N':
-			opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
-			opt_xdp_bind_flags |= XDP_COPY;
-			uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
-			break;
-		case 'c':
-			opt_xdp_bind_flags |= XDP_COPY;
-			break;
-		case 'T':
-			opt_teardown = 1;
-			break;
-		case 'B':
-			opt_bidi = 1;
-			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
 		case 'C':
 			opt_pkt_count = atoi(optarg);
 			break;
+		case 'v':
+			opt_verbose = 1;
+			break;
 		default:
 			usage(basename(argv[0]));
 			ksft_exit_xfail();
 		}
 	}
 
+	if (!opt_pkt_count) {
+		print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
+		opt_pkt_count = DEFAULT_PKT_CNT;
+	}
+
 	if (!validate_interfaces()) {
 		usage(basename(argv[0]));
 		ksft_exit_xfail();
@@ -599,6 +586,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
 {
 	u32 idx;
 	unsigned int i;
+	bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
+	u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
 
 	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
 		complete_tx_only(xsk, batch_size);
@@ -607,11 +596,16 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
 
 		tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
-		tx_desc->len = PKT_SIZE;
+		tx_desc->len = len;
 	}
 
 	xsk_ring_prod__submit(&xsk->tx, batch_size);
-	xsk->outstanding_tx += batch_size;
+	if (!tx_invalid_test) {
+		xsk->outstanding_tx += batch_size;
+	} else {
+		if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+			kick_tx(xsk);
+	}
 	*frameptr += batch_size;
 	*frameptr %= num_frames;
 	complete_tx_only(xsk, batch_size);
@@ -654,7 +648,7 @@ static void tx_only_all(struct ifobject *ifobject)
 	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
 		int batch_size = get_batch_size(pkt_cnt);
 
-		if (opt_poll) {
+		if (test_type == TEST_TYPE_POLL) {
 			ret = poll(fds, 1, POLL_TMOUT);
 			if (ret <= 0)
 				continue;
@@ -714,7 +708,7 @@ static void worker_pkt_dump(void)
 		int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
 
 		if (payload == EOT) {
-			ksft_print_msg("End-of-transmission frame received\n");
+			print_verbose("End-of-transmission frame received\n");
 			fprintf(stdout, "---------------------------------------\n");
 			break;
 		}
@@ -723,6 +717,48 @@ static void worker_pkt_dump(void)
 	}
 }
 
+static void worker_stats_validate(struct ifobject *ifobject)
+{
+	struct xdp_statistics stats;
+	socklen_t optlen;
+	int err;
+	struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
+							ifdict[!ifobject->ifdict_index]->xsk->xsk :
+							ifobject->xsk->xsk;
+	int fd = xsk_socket__fd(xsk);
+	unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
+
+	sigvar = 0;
+
+	optlen = sizeof(stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+	if (err)
+		return;
+
+	if (optlen == sizeof(struct xdp_statistics)) {
+		switch (stat_test_type) {
+		case STAT_TEST_RX_DROPPED:
+			xsk_stat = stats.rx_dropped;
+			break;
+		case STAT_TEST_TX_INVALID:
+			xsk_stat = stats.tx_invalid_descs;
+			break;
+		case STAT_TEST_RX_FULL:
+			xsk_stat = stats.rx_ring_full;
+			expected_stat -= RX_FULL_RXQSIZE;
+			break;
+		case STAT_TEST_RX_FILL_EMPTY:
+			xsk_stat = stats.rx_fill_ring_empty_descs;
+			break;
+		default:
+			break;
+		}
+
+		if (xsk_stat == expected_stat)
+			sigvar = 1;
+	}
+}
+
 static void worker_pkt_validate(void)
 {
 	u32 payloadseqnum = -2;
@@ -746,7 +782,7 @@ static void worker_pkt_validate(void)
 			}
 
 			if (payloadseqnum == EOT) {
-				ksft_print_msg("End-of-transmission frame received: PASS\n");
+				print_verbose("End-of-transmission frame received: PASS\n");
 				sigvar = 1;
 				break;
 			}
@@ -836,7 +872,7 @@ static void *worker_testapp_validate(void *arg)
 			usleep(USLEEP_MAX);
 		}
 
-		ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname);
+		print_verbose("Interface [%s] vector [Tx]\n", ifobject->ifname);
 		for (int i = 0; i < num_frames; i++) {
 			/*send EOT frame */
 			if (i == (num_frames - 1))
@@ -850,7 +886,7 @@ static void *worker_testapp_validate(void *arg)
 			gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
 		}
 
-		ksft_print_msg("Sending %d packets on interface %s\n",
+		print_verbose("Sending %d packets on interface %s\n",
 			       (opt_pkt_count - 1), ifobject->ifname);
 		tx_only_all(ifobject);
 	} else if (ifobject->fv.vector == rx) {
@@ -860,8 +896,9 @@ static void *worker_testapp_validate(void *arg)
 		if (!bidi_pass)
 			thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
 
-		ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname);
-		xsk_populate_fill_ring(ifobject->umem);
+		print_verbose("Interface [%s] vector [Rx]\n", ifobject->ifname);
+		if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
+			xsk_populate_fill_ring(ifobject->umem);
 
 		TAILQ_INIT(&head);
 		if (debug_pkt_dump) {
@@ -878,26 +915,32 @@ static void *worker_testapp_validate(void *arg)
 		pthread_mutex_unlock(&sync_mutex);
 
 		while (1) {
-			if (opt_poll) {
+			if (test_type == TEST_TYPE_POLL) {
 				ret = poll(fds, 1, POLL_TMOUT);
 				if (ret <= 0)
 					continue;
 			}
-			rx_pkt(ifobject->xsk, fds);
-			worker_pkt_validate();
+
+			if (test_type != TEST_TYPE_STATS) {
+				rx_pkt(ifobject->xsk, fds);
+				worker_pkt_validate();
+			} else {
+				worker_stats_validate(ifobject);
+			}
 
 			if (sigvar)
 				break;
 		}
 
-		ksft_print_msg("Received %d packets on interface %s\n",
-			       pkt_counter, ifobject->ifname);
+		if (test_type != TEST_TYPE_STATS)
+			print_verbose("Received %d packets on interface %s\n",
+				pkt_counter, ifobject->ifname);
 
-		if (opt_teardown)
-			ksft_print_msg("Destroying socket\n");
+		if (test_type == TEST_TYPE_TEARDOWN)
+			print_verbose("Destroying socket\n");
 	}
 
-	if (!opt_bidi || bidi_pass) {
+	if ((test_type != TEST_TYPE_BIDI) || bidi_pass) {
 		xsk_socket__delete(ifobject->xsk->xsk);
 		(void)xsk_umem__delete(ifobject->umem->umem);
 	}
@@ -907,14 +950,15 @@ static void *worker_testapp_validate(void *arg)
 static void testapp_validate(void)
 {
 	struct timespec max_wait = { 0, 0 };
+	bool bidi = test_type == TEST_TYPE_BIDI;
 
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
-	if (opt_bidi && bidi_pass) {
+	if ((test_type == TEST_TYPE_BIDI) && bidi_pass) {
 		pthread_init_mutex();
 		if (!switching_notify) {
-			ksft_print_msg("Switching Tx/Rx vectors\n");
+			print_verbose("Switching Tx/Rx vectors\n");
 			switching_notify++;
 		}
 	}
@@ -922,10 +966,10 @@ static void testapp_validate(void)
 	pthread_mutex_lock(&sync_mutex);
 
 	/*Spawn RX thread */
-	if (!opt_bidi || !bidi_pass) {
+	if (!bidi || !bidi_pass) {
 		if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1]))
 			exit_with_error(errno);
-	} else if (opt_bidi && bidi_pass) {
+	} else if (bidi && bidi_pass) {
 		/*switch Tx/Rx vectors */
 		ifdict[0]->fv.vector = rx;
 		if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0]))
@@ -942,10 +986,10 @@ static void testapp_validate(void)
 	pthread_mutex_unlock(&sync_mutex);
 
 	/*Spawn TX thread */
-	if (!opt_bidi || !bidi_pass) {
+	if (!bidi || !bidi_pass) {
 		if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0]))
 			exit_with_error(errno);
-	} else if (opt_bidi && bidi_pass) {
+	} else if (bidi && bidi_pass) {
 		/*switch Tx/Rx vectors */
 		ifdict[1]->fv.vector = tx;
 		if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1]))
@@ -964,19 +1008,46 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
-	if (!opt_teardown && !opt_bidi)
+	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !(test_type == TEST_TYPE_STATS))
 		print_ksft_result();
 }
 
 static void testapp_sockets(void)
 {
-	for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
+	for (int i = 0; i < ((test_type == TEST_TYPE_TEARDOWN) ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER);
+	     i++) {
 		pkt_counter = 0;
 		prev_pkt = -1;
 		sigvar = 0;
-		ksft_print_msg("Creating socket\n");
+		print_verbose("Creating socket\n");
+		testapp_validate();
+		test_type == TEST_TYPE_BIDI ? bidi_pass++ : bidi_pass;
+	}
+
+	print_ksft_result();
+}
+
+static void testapp_stats(void)
+{
+	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
+		stat_test_type = i;
+
+		/* reset defaults */
+		rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+		frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+
+		switch (stat_test_type) {
+		case STAT_TEST_RX_DROPPED:
+			frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
+						XDP_PACKET_HEADROOM - 1;
+			break;
+		case STAT_TEST_RX_FULL:
+			rxqsize = RX_FULL_RXQSIZE;
+			break;
+		default:
+			break;
+		}
 		testapp_validate();
-		opt_bidi ? bidi_pass++ : bidi_pass;
 	}
 
 	print_ksft_result();
@@ -1003,6 +1074,104 @@ static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
 	ifdict[1]->src_port = ifaceconfig->dst_port;
 }
 
+static void *nsdisablemodethread(void *args)
+{
+	struct targs *targs = args;
+
+	targs->retptr = false;
+
+	if (switch_namespace(targs->idx)) {
+		targs->retptr = bpf_set_link_xdp_fd(ifdict[targs->idx]->ifindex, -1, targs->flags);
+	} else {
+		targs->retptr = errno;
+		print_verbose("Failed to switch namespace to %s\n", ifdict[targs->idx]->nsname);
+	}
+
+	pthread_exit(NULL);
+}
+
+static void disable_xdp_mode(int mode)
+{
+	int err = 0;
+	__u32 flags = XDP_FLAGS_UPDATE_IF_NOEXIST | mode;
+	char *mode_str = mode & XDP_FLAGS_SKB_MODE ? "skb" : "drv";
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		if (strcmp(ifdict[i]->nsname, "")) {
+			struct targs *targs;
+
+			targs = malloc(sizeof(*targs));
+			memset(targs, 0, sizeof(*targs));
+			if (!targs)
+				exit_with_error(errno);
+
+			targs->idx = i;
+			targs->flags = flags;
+			if (pthread_create(&ns_thread, NULL, nsdisablemodethread, targs))
+				exit_with_error(errno);
+
+			pthread_join(ns_thread, NULL);
+			err = targs->retptr;
+			free(targs);
+		} else {
+			err = bpf_set_link_xdp_fd(ifdict[i]->ifindex, -1, flags);
+		}
+
+		if (err) {
+			print_verbose("Failed to disable %s mode on interface %s\n",
+						mode_str, ifdict[i]->ifname);
+			exit_with_error(err);
+		}
+
+		print_verbose("Disabled %s mode for interface: %s\n", mode_str, ifdict[i]->ifname);
+		configured_mode = mode & XDP_FLAGS_SKB_MODE ? TEST_MODE_DRV : TEST_MODE_SKB;
+	}
+}
+
+static void run_pkt_test(int mode, int type)
+{
+	test_type = type;
+
+	/* reset defaults after potential previous test */
+	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+	pkt_counter = 0;
+	switching_notify = 0;
+	bidi_pass = 0;
+	prev_pkt = -1;
+	ifdict[0]->fv.vector = tx;
+	ifdict[1]->fv.vector = rx;
+	sigvar = 0;
+	stat_test_type = -1;
+	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+
+	switch (mode) {
+	case (TEST_MODE_SKB):
+		if (configured_mode == TEST_MODE_DRV)
+			disable_xdp_mode(XDP_FLAGS_DRV_MODE);
+		xdp_flags |= XDP_FLAGS_SKB_MODE;
+		break;
+	case (TEST_MODE_DRV):
+		if (configured_mode == TEST_MODE_SKB)
+			disable_xdp_mode(XDP_FLAGS_SKB_MODE);
+		xdp_flags |= XDP_FLAGS_DRV_MODE;
+		break;
+	default:
+		break;
+	}
+
+	pthread_init_mutex();
+
+	if (test_type == TEST_TYPE_STATS)
+		testapp_stats();
+	else if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI))
+		testapp_validate();
+	else
+		testapp_sockets();
+
+	pthread_destroy_mutex();
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
@@ -1016,6 +1185,7 @@ int main(int argc, char **argv)
 	const char *IP2 = "192.168.100.161";
 	u16 UDP_DST_PORT = 2020;
 	u16 UDP_SRC_PORT = 2121;
+	int i, j;
 
 	ifaceconfig = malloc(sizeof(struct ifaceconfigobj));
 	memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
@@ -1041,24 +1211,18 @@ int main(int argc, char **argv)
 
 	init_iface_config(ifaceconfig);
 
-	pthread_init_mutex();
+	disable_xdp_mode(XDP_FLAGS_DRV_MODE);
 
-	ksft_set_plan(1);
+	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
-	if (!opt_teardown && !opt_bidi) {
-		testapp_validate();
-	} else if (opt_teardown && opt_bidi) {
-		ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
-		ksft_exit_xfail();
-	} else {
-		testapp_sockets();
+	for (i = 0; i < TEST_MODE_MAX; i++) {
+		for (j = 0; j < TEST_TYPE_MAX; j++)
+			run_pkt_test(i, j);
 	}
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
 
-	pthread_destroy_mutex();
-
 	ksft_exit_pass();
 
 	return 0;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 0e9f9b7e61c2..30314ef305c2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -41,33 +41,59 @@
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
 #define NEED_WAKEUP true
+#define DEFAULT_PKT_CNT 10000
+#define RX_FULL_RXQSIZE 32
+
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
 
 typedef __u32 u32;
 typedef __u16 u16;
 typedef __u8 u8;
 
-enum TESTS {
-	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
-	ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
+enum TEST_MODES {
+	TEST_MODE_UNCONFIGURED = -1,
+	TEST_MODE_SKB,
+	TEST_MODE_DRV,
+	TEST_MODE_MAX
+};
+
+enum TEST_TYPES {
+	TEST_TYPE_NOPOLL,
+	TEST_TYPE_POLL,
+	TEST_TYPE_TEARDOWN,
+	TEST_TYPE_BIDI,
+	TEST_TYPE_STATS,
+	TEST_TYPE_MAX
 };
 
-u8 uut;
-u8 debug_pkt_dump;
-u32 num_frames;
-u8 switching_notify;
-u8 bidi_pass;
+enum STAT_TEST_TYPES {
+	STAT_TEST_RX_DROPPED,
+	STAT_TEST_TX_INVALID,
+	STAT_TEST_RX_FULL,
+	STAT_TEST_RX_FILL_EMPTY,
+	STAT_TEST_TYPE_MAX
+};
+
+static int configured_mode = TEST_MODE_UNCONFIGURED;
+static u8 debug_pkt_dump;
+static u32 num_frames;
+static u8 switching_notify;
+static u8 bidi_pass;
+static int test_type;
 
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
-static int opt_poll;
-static int opt_teardown;
-static int opt_bidi;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 opt_verbose;
+
+static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-static u32 prev_pkt = -1;
+static long prev_pkt = -1;
 static int sigvar;
+static int stat_test_type;
+static u32 rxqsize;
+static u32 frame_headroom;
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
@@ -137,8 +163,9 @@ pthread_t t0, t1, ns_thread;
 pthread_attr_t attr;
 
 struct targs {
-	bool retptr;
+	u8 retptr;
 	int idx;
+	u32 flags;
 };
 
 TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 9d54c4645127..dac1c5f78752 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -82,24 +82,21 @@ clear_configs()
 {
 	if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
 		[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
-			{ echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
-		echo "removing ns $3"
+			{ ip netns exec $3 ip link del $2; }
 		ip netns del $3
 	fi
 	#Once we delete a veth pair node, the entire veth pair is removed,
 	#this is just to be cautious just incase the NS does not exist then
 	#veth node inside NS won't get removed so we explicitly remove it
 	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
-		{ echo "removing link $1"; ip link del $1; }
+		{ ip link del $1; }
 	if [ -f ${SPECFILE} ]; then
-		echo "removing spec file:" ${SPECFILE}
 		rm -f ${SPECFILE}
 	fi
 }
 
 cleanup_exit()
 {
-	echo "cleaning up..."
 	clear_configs $1 $2 $3
 }
 
@@ -108,28 +105,7 @@ validate_ip_utility()
 	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
 }
 
-vethXDPgeneric()
-{
-	ip link set dev $1 xdpdrv off
-	ip netns exec $3 ip link set dev $2 xdpdrv off
-}
-
-vethXDPnative()
-{
-	ip link set dev $1 xdpgeneric off
-	ip netns exec $3 ip link set dev $2 xdpgeneric off
-}
-
 execxdpxceiver()
 {
-	local -a 'paramkeys=("${!'"$1"'[@]}")' copy
-	paramkeysstr=${paramkeys[*]}
-
-	for index in $paramkeysstr;
-		do
-			current=$1"[$index]"
-			copy[$index]=${!current}
-		done
-
-	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
 }
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 1fedfc9da434..42d44e27802c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -446,6 +446,35 @@ __invalid_nexthop_test()
 	log_test "Unresolved neigh: nexthop does not exist: $desc"
 }
 
+__invalid_nexthop_bucket_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local via_add=$1; shift
+	local trap_name="unresolved_neigh"
+
+	RET=0
+
+	# Check that route to nexthop that does not exist triggers
+	# unresolved_neigh
+	ip nexthop add id 1 via $via_add dev $rp2
+	ip nexthop add id 10 group 1 type resilient buckets 32
+	ip route add $dip nhid 10
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	ping_do $h1 $dip
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		check_err 1 "Trap counter did not increase"
+	fi
+
+	ip route del $dip nhid 10
+	ip nexthop del id 10
+	ip nexthop del id 1
+	log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
+}
+
 unresolved_neigh_test()
 {
 	__host_miss_test "IPv4" 198.51.100.1
@@ -453,6 +482,8 @@ unresolved_neigh_test()
 	__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
 	__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
 		2001:db8:2::4
+	__invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
+	__invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
 }
 
 vrf_without_routes_create()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index ed346da5d3cb..a217f9f6775b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -33,6 +33,7 @@ ALL_TESTS="
 	nexthop_obj_invalid_test
 	nexthop_obj_offload_test
 	nexthop_obj_group_offload_test
+	nexthop_obj_bucket_offload_test
 	nexthop_obj_blackhole_offload_test
 	nexthop_obj_route_offload_test
 	devlink_reload_test
@@ -739,11 +740,28 @@ nexthop_obj_invalid_test()
 
 	ip nexthop add id 1 dev $swp1
 	ip nexthop add id 2 dev $swp1
+	ip nexthop add id 3 via 192.0.2.3 dev $swp1
 	ip nexthop add id 10 group 1/2
 	check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
 
+	ip nexthop add id 10 group 3 type resilient buckets 7
+	check_fail $? "managed to configure a too small resilient nexthop group when should not"
+
+	ip nexthop add id 10 group 3 type resilient buckets 129
+	check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
+
+	ip nexthop add id 10 group 1/2 type resilient buckets 32
+	check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
+
+	ip nexthop add id 10 group 3 type resilient buckets 32
+	check_err $? "failed to configure a valid resilient nexthop group"
+	ip nexthop replace id 3 dev $swp1
+	check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
+
 	log_test "nexthop objects - invalid configurations"
 
+	ip nexthop del id 10
+	ip nexthop del id 3
 	ip nexthop del id 2
 	ip nexthop del id 1
 
@@ -858,6 +876,70 @@ nexthop_obj_group_offload_test()
 	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
 }
 
+nexthop_obj_bucket_offload_test()
+{
+	# Test offload indication of nexthop buckets
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+	ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 1
+	check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 2
+	check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
+
+	# Invalidate nexthop id 1
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" wait_for_trap \
+		ip nexthop bucket show nhid 1
+	check_err $? "IPv4 nexthop buckets not marked with trap when should"
+
+	# Invalidate nexthop id 2
+	ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+	busywait "$TIMEOUT" wait_for_trap \
+		ip nexthop bucket show nhid 2
+	check_err $? "IPv6 nexthop buckets not marked with trap when should"
+
+	# Revalidate nexthop id 1 by changing its configuration
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 1
+	check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
+
+	# Revalidate nexthop id 2 by changing its neighbour
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 2
+	check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
+
+	log_test "nexthop bucket offload indication"
+
+	ip neigh del 2001:db8:1::2 dev $swp1
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 10
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
 nexthop_obj_blackhole_offload_test()
 {
 	# Test offload indication of blackhole nexthop objects
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
deleted file mode 100755
index 0231205a7147..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-lib_dir=$(dirname $0)/../../../../net/forwarding
-
-VXPORT=4789
-
-ALL_TESTS="
-	create_dot1d_and_dot1ad_vxlans
-"
-NUM_NETIFS=2
-source $lib_dir/lib.sh
-
-setup_prepare()
-{
-	swp1=${NETIFS[p1]}
-	swp2=${NETIFS[p2]}
-
-	ip link set dev $swp1 up
-	ip link set dev $swp2 up
-}
-
-cleanup()
-{
-	pre_cleanup
-
-	ip link set dev $swp2 down
-	ip link set dev $swp1 down
-}
-
-create_dot1d_and_dot1ad_vxlans()
-{
-	RET=0
-
-	ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
-		vlan_default_pvid 0 mcast_snooping 0
-	ip link set dev br0 up
-
-	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
-		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
-	ip link set dev vx100 up
-
-	ip link set dev $swp1 master br0
-	ip link set dev vx100 master br0
-	bridge vlan add vid 100 dev vx100 pvid untagged
-
-	ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0
-	ip link set dev br1 up
-
-	ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \
-		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
-	ip link set dev vx200 up
-
-	ip link set dev $swp2 master br1
-	ip link set dev vx200 master br1 2>/dev/null
-	check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected"
-
-	ip link set dev vx200 master br1 2>&1 >/dev/null \
-		| grep -q mlxsw_spectrum
-	check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack"
-
-	log_test "create 802.1d and 802.1ad VxLANs"
-
-	ip link del dev vx200
-	ip link del dev br1
-	ip link del dev vx100
-	ip link del dev br0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 553cb9fad508..b4dbda706c4d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -18,6 +18,7 @@ NUM_NETIFS=2
 
 source $lib_dir/tc_common.sh
 source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
 
 switch_create()
 {
@@ -166,7 +167,8 @@ matchall_sample_egress_test()
 	RET=0
 
 	# It is forbidden in mlxsw driver to have matchall with sample action
-	# bound on egress
+	# bound on egress. Spectrum-1 specific restriction
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
 
 	tc qdisc add dev $swp1 clsact
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
new file mode 100755
index 000000000000..57b05f042787
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -0,0 +1,627 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that packets are sampled when tc-sample is used and that reported
+# metadata is correct. Two sets of hosts (with and without LAG) are used, since
+# metadata extraction in mlxsw is a bit different when LAG is involved.
+#
+# +---------------------------------+       +---------------------------------+
+# | H1 (vrf)                        |       | H3 (vrf)                        |
+# |    + $h1                        |       |    + $h3_lag                    |
+# |    | 192.0.2.1/28               |       |    | 192.0.2.17/28              |
+# |    |                            |       |    |                            |
+# |    |  default via 192.0.2.2     |       |    |  default via 192.0.2.18    |
+# +----|----------------------------+       +----|----------------------------+
+#      |                                         |
+# +----|-----------------------------------------|----------------------------+
+# |    | 192.0.2.2/28                            | 192.0.2.18/28              |
+# |    + $rp1                                    + $rp3_lag                   |
+# |                                                                           |
+# |    + $rp2                                    + $rp4_lag                   |
+# |    | 198.51.100.2/28                         | 198.51.100.18/28           |
+# +----|-----------------------------------------|----------------------------+
+#      |                                         |
+# +----|----------------------------+       +----|----------------------------+
+# |    |  default via 198.51.100.2  |       |    |  default via 198.51.100.18 |
+# |    |                            |       |    |                            |
+# |    | 198.51.100.1/28            |       |    | 198.51.100.17/28           |
+# |    + $h2                        |       |    + $h4_lag                    |
+# | H2 (vrf)                        |       | H4 (vrf)                        |
+# +---------------------------------+       +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	tc_sample_rate_test
+	tc_sample_max_rate_test
+	tc_sample_group_conflict_test
+	tc_sample_md_iif_test
+	tc_sample_md_lag_iif_test
+	tc_sample_md_oif_test
+	tc_sample_md_lag_oif_test
+	tc_sample_md_out_tc_test
+	tc_sample_md_out_tc_occ_test
+	tc_sample_md_latency_test
+	tc_sample_acl_group_conflict_test
+	tc_sample_acl_rate_test
+	tc_sample_acl_max_rate_test
+"
+NUM_NETIFS=8
+CAPTURE_FILE=$(mktemp)
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/28
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/28
+}
+
+h3_create()
+{
+	ip link set dev $h3 down
+	ip link add name ${h3}_bond type bond mode 802.3ad
+	ip link set dev $h3 master ${h3}_bond
+
+	simple_if_init ${h3}_bond 192.0.2.17/28
+
+	ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
+}
+
+h3_destroy()
+{
+	ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
+
+	simple_if_fini ${h3}_bond 192.0.2.17/28
+
+	ip link set dev $h3 nomaster
+	ip link del dev ${h3}_bond
+}
+
+h4_create()
+{
+	ip link set dev $h4 down
+	ip link add name ${h4}_bond type bond mode 802.3ad
+	ip link set dev $h4 master ${h4}_bond
+
+	simple_if_init ${h4}_bond 198.51.100.17/28
+
+	ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
+}
+
+h4_destroy()
+{
+	ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
+
+	simple_if_fini ${h4}_bond 198.51.100.17/28
+
+	ip link set dev $h4 nomaster
+	ip link del dev ${h4}_bond
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	__addr_add_del $rp1 add 192.0.2.2/28
+	tc qdisc add dev $rp1 clsact
+
+	ip link set dev $rp2 up
+	__addr_add_del $rp2 add 198.51.100.2/28
+	tc qdisc add dev $rp2 clsact
+
+	ip link add name ${rp3}_bond type bond mode 802.3ad
+	ip link set dev $rp3 master ${rp3}_bond
+	__addr_add_del ${rp3}_bond add 192.0.2.18/28
+	tc qdisc add dev $rp3 clsact
+	ip link set dev ${rp3}_bond up
+
+	ip link add name ${rp4}_bond type bond mode 802.3ad
+	ip link set dev $rp4 master ${rp4}_bond
+	__addr_add_del ${rp4}_bond add 198.51.100.18/28
+	tc qdisc add dev $rp4 clsact
+	ip link set dev ${rp4}_bond up
+}
+
+router_destroy()
+{
+	ip link set dev ${rp4}_bond down
+	tc qdisc del dev $rp4 clsact
+	__addr_add_del ${rp4}_bond del 198.51.100.18/28
+	ip link set dev $rp4 nomaster
+	ip link del dev ${rp4}_bond
+
+	ip link set dev ${rp3}_bond down
+	tc qdisc del dev $rp3 clsact
+	__addr_add_del ${rp3}_bond del 192.0.2.18/28
+	ip link set dev $rp3 nomaster
+	ip link del dev ${rp3}_bond
+
+	tc qdisc del dev $rp2 clsact
+	__addr_add_del $rp2 del 198.51.100.2/28
+	ip link set dev $rp2 down
+
+	tc qdisc del dev $rp1 clsact
+	__addr_add_del $rp1 del 192.0.2.2/28
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+	h3=${NETIFS[p5]}
+	rp3=${NETIFS[p6]}
+	h4=${NETIFS[p7]}
+	rp4=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	h4_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	rm -f $CAPTURE_FILE
+
+	router_destroy
+	h4_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+psample_capture_start()
+{
+	rm -f $CAPTURE_FILE
+
+	psample &> $CAPTURE_FILE &
+
+	sleep 1
+}
+
+psample_capture_stop()
+{
+	{ kill %% && wait %%; } 2>/dev/null
+}
+
+__tc_sample_rate_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local pkts pct
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B $dip -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+	pct=$((100 * (pkts - 100) / 100))
+	(( -25 <= pct && pct <= 25))
+	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+	log_test "tc sample rate ($desc)"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_rate_test()
+{
+	__tc_sample_rate_test "forward" 198.51.100.1
+	__tc_sample_rate_test "local receive" 192.0.2.2
+}
+
+tc_sample_max_rate_test()
+{
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate $((35 * 10 ** 8)) group 1
+	check_err $? "Failed to configure sampling rule with max rate"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
+		group 1 &> /dev/null
+	check_fail $? "Managed to configure sampling rate above maximum"
+
+	log_test "tc sample maximum rate"
+}
+
+tc_sample_group_conflict_test()
+{
+	RET=0
+
+	# Test that two sampling rules cannot be configured on the same port
+	# with different groups.
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+		skip_sw action sample rate 1024 group 2 &> /dev/null
+	check_fail $? "Managed to configure sampling rule with conflicting group"
+
+	log_test "tc sample group conflict test"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_iif_test()
+{
+	local rp1_ifindex
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
+	grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected in-ifindex"
+
+	log_test "tc sample iif"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_iif_test()
+{
+	local rp3_ifindex
+
+	RET=0
+
+	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
+	grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected in-ifindex"
+
+	log_test "tc sample lag iif"
+
+	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_oif_test()
+{
+	local rp2_ifindex
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
+	grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-ifindex"
+
+	log_test "tc sample oif"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_oif_test()
+{
+	local rp4_ifindex
+
+	RET=0
+
+	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
+	grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-ifindex"
+
+	log_test "tc sample lag oif"
+
+	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_test()
+{
+	RET=0
+
+	# Output traffic class is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	# By default, all the packets should go to the same traffic class (0).
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "out-tc 0 " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-tc (0)"
+
+	# Map all priorities to highest traffic class (7) and check reported
+	# out-tc.
+	tc qdisc replace dev $rp2 root handle 1: \
+		prio bands 3 priomap 0 0 0 0 0 0 0 0
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "out-tc 7 " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-tc (7)"
+
+	log_test "tc sample out-tc"
+
+	tc qdisc del dev $rp2 root handle 1:
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_occ_test()
+{
+	local backlog pct occ
+
+	RET=0
+
+	# Output traffic class occupancy is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	# Configure a shaper on egress to create congestion.
+	tc qdisc replace dev $rp2 root handle 1: \
+		tbf rate 1Mbit burst 256k limit 1M
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
+
+	# Allow congestion to reach steady state.
+	sleep 10
+
+	backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
+
+	# Kill mausezahn.
+	{ kill %% && wait %%; } 2>/dev/null
+
+	psample_capture_stop
+
+	# Record last congestion sample.
+	occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
+		cut -d ' ' -f 16)
+
+	pct=$((100 * (occ - backlog) / backlog))
+	(( -1 <= pct && pct <= 1))
+	check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
+
+	log_test "tc sample out-tc-occ"
+
+	tc qdisc del dev $rp2 root handle 1:
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_latency_test()
+{
+	RET=0
+
+	# Egress sampling not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have latency attribute"
+
+	log_test "tc sample latency"
+
+	tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_acl_group_conflict_test()
+{
+	RET=0
+
+	# Test that two flower sampling rules cannot be configured on the same
+	# port with different groups.
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule with same group"
+
+	tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
+		skip_sw action sample rate 1024 group 2 &> /dev/null
+	check_fail $? "Managed to configure sampling rule with conflicting group"
+
+	log_test "tc sample (w/ flower) group conflict test"
+
+	tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__tc_sample_acl_rate_test()
+{
+	local bind=$1; shift
+	local port=$1; shift
+	local pkts pct
+
+	RET=0
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+	pct=$((100 * (pkts - 100) / 100))
+	(( -25 <= pct && pct <= 25))
+	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+	# Setup a filter that should not match any packet and make sure packets
+	# are not sampled.
+	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "group 1 " $CAPTURE_FILE
+	check_fail $? "Sampled packets when should not"
+
+	log_test "tc sample (w/ flower) rate ($bind)"
+
+	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+}
+
+tc_sample_acl_rate_test()
+{
+	__tc_sample_acl_rate_test ingress $rp1
+	__tc_sample_acl_rate_test egress $rp2
+}
+
+tc_sample_acl_max_rate_test()
+{
+	RET=0
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate $((2 ** 24 - 1)) group 1
+	check_err $? "Failed to configure sampling rule with max rate"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate $((2 ** 24)) \
+		group 1 &> /dev/null
+	check_fail $? "Managed to configure sampling rate above maximum"
+
+	log_test "tc sample (w/ flower) maximum rate"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index be0c1b5ee6b8..ba75c81cda91 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -11,14 +11,33 @@ ALL_TESTS="
 	nexthop_single_add_err_test
 	nexthop_group_add_test
 	nexthop_group_add_err_test
+	nexthop_res_group_add_test
+	nexthop_res_group_add_err_test
 	nexthop_group_replace_test
 	nexthop_group_replace_err_test
+	nexthop_res_group_replace_test
+	nexthop_res_group_replace_err_test
+	nexthop_res_group_idle_timer_test
+	nexthop_res_group_idle_timer_del_test
+	nexthop_res_group_increase_idle_timer_test
+	nexthop_res_group_decrease_idle_timer_test
+	nexthop_res_group_unbalanced_timer_test
+	nexthop_res_group_unbalanced_timer_del_test
+	nexthop_res_group_no_unbalanced_timer_test
+	nexthop_res_group_short_unbalanced_timer_test
+	nexthop_res_group_increase_unbalanced_timer_test
+	nexthop_res_group_decrease_unbalanced_timer_test
+	nexthop_res_group_force_migrate_busy_test
 	nexthop_single_replace_test
 	nexthop_single_replace_err_test
 	nexthop_single_in_group_replace_test
 	nexthop_single_in_group_replace_err_test
+	nexthop_single_in_res_group_replace_test
+	nexthop_single_in_res_group_replace_err_test
 	nexthop_single_in_group_delete_test
 	nexthop_single_in_group_delete_err_test
+	nexthop_single_in_res_group_delete_test
+	nexthop_single_in_res_group_delete_err_test
 	nexthop_replay_test
 	nexthop_replay_err_test
 "
@@ -27,6 +46,7 @@ DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
 DEVLINK_DEV=netdevsim/${DEV}
 SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
 NUM_NETIFS=0
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
@@ -44,6 +64,28 @@ nexthop_check()
 	return 0
 }
 
+nexthop_bucket_nhid_count_check()
+{
+	local group_id=$1; shift
+	local expected
+	local count
+	local nhid
+	local ret
+
+	while (($# > 0)); do
+		nhid=$1; shift
+		expected=$1; shift
+
+		count=$($IP nexthop bucket show id $group_id nhid $nhid |
+			grep "trap" | wc -l)
+		if ((expected != count)); then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
 nexthop_resource_check()
 {
 	local expected_occ=$1; shift
@@ -159,6 +201,71 @@ nexthop_group_add_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_res_group_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	$IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+	nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected weighted nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 3
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 7
+	check_err $? "Wrong weighted nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Resilient nexthop group add and delete"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 2
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+	check_fail $? "Nexthop group addition succeeded when should fail"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Resilient nexthop group add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
 nexthop_group_replace_test()
 {
 	RET=0
@@ -206,6 +313,411 @@ nexthop_group_replace_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_res_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+	$IP nexthop replace id 10 group 1/2/3 type resilient
+	nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 3 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 9
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Resilient nexthop group replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+	$IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+	check_fail $? "Nexthop group replacement succeeded when should fail"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_bucket_nhid_count_check 10 1 3
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 3
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 9
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Resilient nexthop group replace failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+	local group_id=$1; shift
+	local nhid=$1; shift
+	local count=$1; shift
+	local index
+
+	for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+		       jq '.[].bucket.index' | head -n ${count:--0})
+	do
+		echo $group_id $index \
+			> $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+	done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+	local group_id=$1; shift
+	local nhid=$1; shift
+
+	$IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "Group expected to be unbalanced"
+
+	sleep 6
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after idle timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,50/2,50/3,1 \
+	    type resilient buckets 8 idle_timer 6
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4  3 0
+	check_err $? "Group expected to be unbalanced"
+
+	sleep 4
+
+	# Deletion prompts group replacement. Check that the bucket timers
+	# are kept.
+	$IP nexthop delete id 3
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "Group expected to still be unbalanced"
+
+	sleep 4
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after idle timer (with delete)"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+	sleep 4
+
+	# 6 seconds, past the original timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group still expected to be unbalanced"
+
+	sleep 4
+
+	# 10 seconds, past the new timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer increase"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+	sleep 4
+
+	# 6 seconds, past the new timer, before the old timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer decrease"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,100/2,100/3,1 \
+	    type resilient buckets 8 $timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+	sleep 4
+
+	# 6 seconds, past the original timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group still expected to be unbalanced"
+
+	sleep 4
+
+	# 10 seconds, past the new timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer increase"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+	__nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+	__nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 6 unbalanced_timer 10
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	for i in 1 2; do
+		sleep 4
+		nexthop_bucket_nhid_count_check 10  1 4  2 4
+		check_err $? "$i: Group expected to be unbalanced"
+		nexthop_res_mark_buckets_busy 10 1
+	done
+
+	# 3 x sleep 4 > unbalanced timer 10
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+	    buckets 8 idle_timer 6 unbalanced_timer 10
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+	# Check that NH delete does not reset unbalanced time.
+	sleep 4
+	$IP nexthop delete id 3
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "1: Group expected to be unbalanced"
+	nexthop_res_mark_buckets_busy 10 1
+
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "2: Group expected to be unbalanced"
+	nexthop_res_mark_buckets_busy 10 1
+
+	# 3 x sleep 4 > unbalanced timer 10
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced timer (with delete)"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	for i in $(seq 3); do
+		sleep 60
+		nexthop_bucket_nhid_count_check 10 2 6
+		check_fail $? "$i: Group expected to be unbalanced"
+		nexthop_res_mark_buckets_busy 10 1
+	done
+
+	log_test "Buckets never force-migrated without unbalanced timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 120 unbalanced_timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 5
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced < idle timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+	__nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+	__nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 120
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	$IP nexthop replace id 10 group 2 type resilient
+	nexthop_bucket_nhid_count_check 10 2 8
+	check_err $? "All buckets expected to have migrated"
+
+	log_test "Busy buckets force-migrated when NH removed"
+
+	$IP nexthop flush &> /dev/null
+}
+
 nexthop_single_replace_test()
 {
 	RET=0
@@ -299,6 +811,63 @@ nexthop_single_in_group_replace_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_single_in_res_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+	check_err $? "Failed to replace nexthop when should not"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in resilient group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replacement succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in resilient group failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
 nexthop_single_in_group_delete_test()
 {
 	RET=0
@@ -346,6 +915,57 @@ nexthop_single_in_group_delete_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_single_in_res_group_delete_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	$IP nexthop del id 1
+	nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 2 4
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in resilient group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+	$IP nexthop del id 1
+
+	# We failed to replace the two nexthop buckets that were originally
+	# assigned to nhid 1.
+	nexthop_bucket_nhid_count_check 10  2 2  3 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 8
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in resilient group failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
 nexthop_replay_test()
 {
 	RET=0
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
new file mode 100755
index 000000000000..ee10b1a8933c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the psample module. It makes use of netdevsim
+# which periodically generates "sampled" packets.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	psample_enable_test
+	psample_group_num_test
+	psample_md_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
+CAPTURE_FILE=$(mktemp)
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+psample_capture()
+{
+	rm -f $CAPTURE_FILE
+
+	timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE
+}
+
+psample_enable_test()
+{
+	RET=0
+
+	echo 1 > $PSAMPLE_DIR/enable
+	check_err $? "Failed to enable sampling when should not"
+
+	echo 1 > $PSAMPLE_DIR/enable 2>/dev/null
+	check_fail $? "Sampling enablement succeeded when should fail"
+
+	psample_capture
+	if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then
+		check_err 1 "Failed to capture sampled packets"
+	fi
+
+	echo 0 > $PSAMPLE_DIR/enable
+	check_err $? "Failed to disable sampling when should not"
+
+	echo 0 > $PSAMPLE_DIR/enable 2>/dev/null
+	check_fail $? "Sampling disablement succeeded when should fail"
+
+	psample_capture
+	if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then
+		check_err 1 "Captured sampled packets when should not"
+	fi
+
+	log_test "psample enable / disable"
+}
+
+psample_group_num_test()
+{
+	RET=0
+
+	echo 1234 > $PSAMPLE_DIR/group_num
+	echo 1 > $PSAMPLE_DIR/enable
+
+	psample_capture
+	grep -q -e "group 1234" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong group number"
+
+	# New group number should only be used after disable / enable.
+	echo 4321 > $PSAMPLE_DIR/group_num
+
+	psample_capture
+	grep -q -e "group 4321" $CAPTURE_FILE
+	check_fail $? "Group number changed while sampling is active"
+
+	echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable
+
+	psample_capture
+	grep -q -e "group 4321" $CAPTURE_FILE
+	check_err $? "Group number did not change after restarting sampling"
+
+	log_test "psample group number"
+
+	echo 0 > $PSAMPLE_DIR/enable
+}
+
+psample_md_test()
+{
+	RET=0
+
+	echo 1 > $PSAMPLE_DIR/enable
+
+	echo 1234 > $PSAMPLE_DIR/in_ifindex
+	echo 4321 > $PSAMPLE_DIR/out_ifindex
+	psample_capture
+
+	grep -q -e "in-ifindex 1234" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong in-ifindex"
+
+	grep -q -e "out-ifindex 4321" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong out-ifindex"
+
+	echo 5 > $PSAMPLE_DIR/out_tc
+	psample_capture
+
+	grep -q -e "out-tc 5" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong out-tc"
+
+	echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc
+	psample_capture
+
+	grep -q -e "out-tc " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with out-tc when should not"
+
+	echo 1 > $PSAMPLE_DIR/out_tc
+	echo 10000 > $PSAMPLE_DIR/out_tc_occ_max
+	psample_capture
+
+	grep -q -e "out-tc-occ " $CAPTURE_FILE
+	check_err $? "Sampled packets not reported with out-tc-occ when should"
+
+	echo 0 > $PSAMPLE_DIR/out_tc_occ_max
+	psample_capture
+
+	grep -q -e "out-tc-occ " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with out-tc-occ when should not"
+
+	echo 10000 > $PSAMPLE_DIR/latency_max
+	psample_capture
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_err $? "Sampled packets not reported with latency when should"
+
+	echo 0 > $PSAMPLE_DIR/latency_max
+	psample_capture
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with latency when should not"
+
+	log_test "psample metadata"
+
+	echo 0 > $PSAMPLE_DIR/enable
+}
+
+setup_prepare()
+{
+	modprobe netdevsim &> /dev/null
+
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	set -e
+
+	ip netns add testns1
+	devlink dev reload $DEVLINK_DEV netns testns1
+
+	set +e
+}
+
+cleanup()
+{
+	pre_cleanup
+	rm -f $CAPTURE_FILE
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d98fb85e201c..56dd0c6f2e96 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,10 +19,39 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
-
-ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+IPV4_TESTS="
+	ipv4_fcnal
+	ipv4_grp_fcnal
+	ipv4_res_grp_fcnal
+	ipv4_withv6_fcnal
+	ipv4_fcnal_runtime
+	ipv4_large_grp
+	ipv4_large_res_grp
+	ipv4_compat_mode
+	ipv4_fdb_grp_fcnal
+	ipv4_torture
+	ipv4_res_torture
+"
+
+IPV6_TESTS="
+	ipv6_fcnal
+	ipv6_grp_fcnal
+	ipv6_res_grp_fcnal
+	ipv6_fcnal_runtime
+	ipv6_large_grp
+	ipv6_large_res_grp
+	ipv6_compat_mode
+	ipv6_fdb_grp_fcnal
+	ipv6_torture
+	ipv6_res_torture
+"
+
+ALL_TESTS="
+	basic
+	basic_res
+	${IPV4_TESTS}
+	${IPV6_TESTS}
+"
 TESTS="${ALL_TESTS}"
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -232,6 +261,19 @@ check_nexthop()
 	check_output "${out}" "${expected}"
 }
 
+check_nexthop_bucket()
+{
+	local nharg="$1"
+	local expected="$2"
+	local out
+
+	# remove the idle time since we cannot match it
+	out=$($IP nexthop bucket ${nharg} \
+		| sed s/idle_time\ [0-9.]*\ // 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
 check_route()
 {
 	local pfx="$1"
@@ -308,6 +350,25 @@ check_large_grp()
 	log_test $? 0 "Dump large (x$ecmp) ecmp groups"
 }
 
+check_large_res_grp()
+{
+	local ipv=$1
+	local buckets=$2
+	local ipstr=""
+
+	if [ $ipv -eq 4 ]; then
+		ipstr="172.16.1.2"
+	else
+		ipstr="2001:db8:91::2"
+	fi
+
+	# create a resilient group with $buckets buckets and dump them
+	run_cmd "$IP nexthop add id 100 via $ipstr dev veth1"
+	run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets"
+	run_cmd "$IP nexthop bucket list"
+	log_test $? 0 "Dump large (x$buckets) nexthop buckets"
+}
+
 start_ip_monitor()
 {
 	local mtype=$1
@@ -344,6 +405,15 @@ check_nexthop_fdb_support()
 	fi
 }
 
+check_nexthop_res_support()
+{
+	$IP nexthop help 2>&1 | grep -q resilient
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 too old, missing resilient nexthop group support"
+		return $ksft_skip
+	fi
+}
+
 ipv6_fdb_grp_fcnal()
 {
 	local rc
@@ -666,6 +736,70 @@ ipv6_grp_fcnal()
 	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
 }
 
+ipv6_res_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6 resilient groups functional"
+	echo "--------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	#
+	# migration of nexthop buckets - equal weights
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" \
+		"id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 62 id 102 index 1 nhid 62"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 63 id 102 index 1 nhid 62"
+	log_test $? 0 "Nexthop buckets updated after replace"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# migration of nexthop buckets - unequal weights
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" \
+		"id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+	log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
 ipv6_fcnal_runtime()
 {
 	local rc
@@ -824,6 +958,22 @@ ipv6_large_grp()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+ipv6_large_res_grp()
+{
+	echo
+	echo "IPv6 large resilient group (128k buckets)"
+	echo "-----------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	check_large_res_grp 6 $((128 * 1024))
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 ipv6_del_add_loop1()
 {
 	while :; do
@@ -874,11 +1024,67 @@ ipv6_torture()
 
 	sleep 300
 	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
 
 	# if we did not crash, success
 	log_test 0 0 "IPv6 torture test"
 }
 
+ipv6_res_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101 type resilient
+	done >/dev/null 2>&1
+}
+
+ipv6_res_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv6 runtime resilient nexthop group torture"
+	echo "--------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+	run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+	run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+	ipv6_del_add_loop1 &
+	pid1=$!
+	ipv6_res_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn -6 veth1 \
+			    -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
+			    -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+	# if we did not crash, success
+	log_test 0 0 "IPv6 resilient nexthop group torture test"
+}
 
 ipv4_fcnal()
 {
@@ -1038,6 +1244,70 @@ ipv4_grp_fcnal()
 	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
 }
 
+ipv4_res_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 resilient groups functional"
+	echo "--------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	#
+	# migration of nexthop buckets - equal weights
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" \
+		"id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 12 id 102 index 1 nhid 12"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 13 id 102 index 1 nhid 12"
+	log_test $? 0 "Nexthop buckets updated after replace"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# migration of nexthop buckets - unequal weights
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" \
+		"id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+	log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
 ipv4_withv6_fcnal()
 {
 	local lladdr
@@ -1259,6 +1529,22 @@ ipv4_large_grp()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+ipv4_large_res_grp()
+{
+	echo
+	echo "IPv4 large resilient group (128k buckets)"
+	echo "-----------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	check_large_res_grp 4 $((128 * 1024))
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 sysctl_nexthop_compat_mode_check()
 {
 	local sysctlname="net.ipv4.nexthop_compat_mode"
@@ -1476,11 +1762,68 @@ ipv4_torture()
 
 	sleep 300
 	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
 
 	# if we did not crash, success
 	log_test 0 0 "IPv4 torture test"
 }
 
+ipv4_res_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101 type resilient
+	done >/dev/null 2>&1
+}
+
+ipv4_res_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv4 runtime resilient nexthop group torture"
+	echo "--------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+	run_cmd "$IP route add 172.16.101.1 nhid 102"
+	run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+	ipv4_del_add_loop1 &
+	pid1=$!
+	ipv4_res_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn veth1 \
+				-B 172.16.101.2 -A 172.16.1.1 -c 0 \
+				-t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+	# if we did not crash, success
+	log_test 0 0 "IPv4 resilient nexthop group torture test"
+}
+
 basic()
 {
 	echo
@@ -1592,6 +1935,204 @@ basic()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+check_nexthop_buckets_balance()
+{
+	local nharg=$1; shift
+	local ret
+
+	while (($# > 0)); do
+		local selector=$1; shift
+		local condition=$1; shift
+		local count
+
+		count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length)
+		(( $count $condition ))
+		ret=$?
+		if ((ret != 0)); then
+			return $ret
+		fi
+	done
+
+	return 0
+}
+
+basic_res()
+{
+	echo
+	echo "Basic resilient nexthop group functional tests"
+	echo "----------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	run_cmd "$IP nexthop add id 1 dev veth1"
+
+	#
+	# resilient nexthop group addition
+	#
+
+	run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8"
+	log_test $? 0 "Add a nexthop group with default parameters"
+
+	run_cmd "$IP nexthop get id 101"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group with default parameters"
+
+	run_cmd "$IP nexthop add id 102 group 1 type resilient
+			buckets 4 idle_timer 100 unbalanced_timer 5"
+	run_cmd "$IP nexthop get id 102"
+	check_nexthop "id 102" \
+		"id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group with non-default parameters"
+
+	run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0"
+	log_test $? 2 "Add a nexthop group with 0 buckets"
+
+	#
+	# resilient nexthop group replacement
+	#
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient
+			buckets 8 idle_timer 240 unbalanced_timer 80"
+	log_test $? 0 "Replace nexthop group parameters"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing parameters"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512"
+	log_test $? 0 "Replace idle timer"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing idle timer"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256"
+	log_test $? 0 "Replace unbalanced timer"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing unbalanced timer"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient"
+	log_test $? 0 "Replace with no parameters"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing no parameters"
+
+	run_cmd "$IP nexthop replace id 101 group 1"
+	log_test $? 2 "Replace nexthop group type - implicit"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type mpath"
+	log_test $? 2 "Replace nexthop group type - explicit"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024"
+	log_test $? 2 "Replace number of nexthop buckets"
+
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing with invalid parameters"
+
+	#
+	# resilient nexthop buckets dump
+	#
+
+	$IP nexthop flush >/dev/null 2>&1
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	run_cmd "$IP nexthop add id 2 dev veth3"
+	run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4"
+	run_cmd "$IP nexthop add id 201 group 1/2"
+
+	check_nexthop_bucket "" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets"
+
+	check_nexthop_bucket "list id 101" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets in a group"
+
+	(( $($IP -j nexthop bucket list id 101 |
+	     jq '[.[] | select(.bucket.idle_time > 0 and
+	                       .bucket.idle_time < 2)] | length') == 4 ))
+	log_test $? 0 "All nexthop buckets report a positive near-zero idle time"
+
+	check_nexthop_bucket "list dev veth1" \
+		"id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets with a specific nexthop device"
+
+	check_nexthop_bucket "list nhid 2" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2"
+	log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier"
+
+	run_cmd "$IP nexthop bucket list id 111"
+	log_test $? 2 "Dump all nexthop buckets in a non-existent group"
+
+	run_cmd "$IP nexthop bucket list id 201"
+	log_test $? 2 "Dump all nexthop buckets in a non-resilient group"
+
+	run_cmd "$IP nexthop bucket list dev bla"
+	log_test $? 255 "Dump all nexthop buckets using a non-existent device"
+
+	run_cmd "$IP nexthop bucket list groups"
+	log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword"
+
+	run_cmd "$IP nexthop bucket list fdb"
+	log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
+	#
+	# resilient nexthop buckets get requests
+	#
+
+	check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2"
+	log_test $? 0 "Get a valid nexthop bucket"
+
+	run_cmd "$IP nexthop bucket get id 101 index 999"
+	log_test $? 2 "Get a nexthop bucket with valid group, but invalid index"
+
+	run_cmd "$IP nexthop bucket get id 201 index 0"
+	log_test $? 2 "Get a nexthop bucket from a non-resilient group"
+
+	run_cmd "$IP nexthop bucket get id 999 index 0"
+	log_test $? 2 "Get a nexthop bucket from a non-existent group"
+
+	#
+	# tests for bucket migration
+	#
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	run_cmd "$IP nexthop add id 2 dev veth3"
+	run_cmd "$IP nexthop add id 101
+			group 1/2 type resilient buckets 10
+			idle_timer 1 unbalanced_timer 20"
+
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 5" \
+				      "nhid 2" "== 5"
+	log_test $? 0 "Initial bucket allocation"
+
+	run_cmd "$IP nexthop replace id 101
+			group 1,2/2,3 type resilient"
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 4" \
+				      "nhid 2" "== 6"
+	log_test $? 0 "Bucket allocation after replace"
+
+	# Check that increase in idle timer does not make buckets appear busy.
+	run_cmd "$IP nexthop replace id 101
+			group 1,2/2,3 type resilient
+			idle_timer 10"
+	run_cmd "$IP nexthop replace id 101
+			group 1/2 type resilient"
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 5" \
+				      "nhid 2" "== 5"
+	log_test $? 0 "Buckets migrated after idle timer change"
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 ################################################################################
 # usage
 
diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
new file mode 100755
index 000000000000..5148d97a5df8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                               +----------------------+
+# | H1 (vrf)           |                               |             H2 (vrf) |
+# |    + h1.10         |                               |  + h2.20             |
+# |    | 192.0.2.1/28  |                               |  | 192.0.2.2/28      |
+# |    |               |                               |  |                   |
+# |    + $h1           |                               |  + $h2               |
+# |    |               |                               |  |                   |
+# +----|---------------+                               +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|--------------------+
+# | SW |                                                  |                    |
+# | +--|-------------------------------+ +----------------|------------------+ |
+# | |  + $swp1         BR1 (802.1ad)   | | BR2 (802.1d)   + $swp2            | |
+# | |    vid 100 pvid untagged         | |                |                  | |
+# | |                                  | |                + $swp2.20         | |
+# | |                                  | |                                   | |
+# | |  + vx100 (vxlan)                 | |  + vx200 (vxlan)                  | |
+# | |    local 192.0.2.17              | |    local 192.0.2.17               | |
+# | |    remote 192.0.2.34             | |    remote 192.0.2.50              | |
+# | |    id 1000 dstport $VXPORT       | |    id 2000 dstport $VXPORT        | |
+# | |    vid 100 pvid untagged         | |                                   | |
+# | +--------------------------------- + +-----------------------------------+ |
+# |                                                                            |
+# |  192.0.2.32/28 via 192.0.2.18                                              |
+# |  192.0.2.48/28 via 192.0.2.18                                              |
+# |                                                                            |
+# |    + $rp1                                                                  |
+# |    | 192.0.2.17/28                                                         |
+# +----|-----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                 BR3 (802.1ad) | |   | |                  BR3 (802.1d) | |
+# | |  + vx100 (vxlan)              | |   | |  + vx200 (vxlan)              | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 2000 dstport $VXPORT    | |
+# | |    vid 100 pvid untagged      | |   | |                               | |
+# | |                               | |   | |  + w1.20                      | |
+# | |                               | |   | |  |                            | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | |  | vid 100 pvid untagged      | |   | |  |                            | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  |                  VW2 (vrf) | |   | |  |                  VW2 (vrf) | |
+# | |  + w2 (veth)                  | |   | |  + w2 (veth)                  | |
+# | |  |                            | |   | |  |                            | |
+# | |  |                            | |   | |  |                            | |
+# | |  + w2.10                      | |   | |  + w2.20                      | |
+# | |    192.0.2.3/28               | |   | |    192.0.2.4/28               | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 20 v$h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	#### BR1 ####
+	ip link add name br1 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	#### BR2 ####
+	ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br2 address $(mac_get $swp2)
+	ip link set dev br2 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	#### VX100 ####
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev vx100 master br1
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 100 dev $swp1 pvid untagged
+
+	#### VX200 ####
+	ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx200 up
+
+	ip link set dev vx200 master br2
+
+	ip link set dev $swp2 up
+	ip link add name $swp2.20 link $swp2 type vlan id 20
+	ip link set dev $swp2.20 master br2
+	ip link set dev $swp2.20 up
+
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	ip link set dev vx200 nomaster
+	ip link set dev vx200 down
+	ip link del dev vx200
+
+	ip link del dev $swp2.20
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 100 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx100 nomaster
+	ip link set dev vx100 down
+	ip link del dev vx100
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br2 down
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local vxlan_name=$1; shift
+	local vxlan_id=$1; shift
+	local vlan_id=$1; shift
+	local host_addr=$1; shift
+	local nh_addr=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br3 type bridge vlan_filtering 0
+	ip link set dev br3 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br3
+	ip link set dev w1 up
+
+	ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev $vxlan_name up
+	bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev $vxlan_name master br3
+	tc qdisc add dev $vxlan_name clsact
+
+	simple_if_init w2
+	vlan_create w2 $vlan_id vw2 $host_addr/28
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \
+	      192.0.2.33
+
+	in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \
+	      192.0.2.49
+
+	in_ns ns2 ip link add name w1.20 link w1 type vlan id 20
+	in_ns ns2 ip link set dev w1.20 master br3
+	in_ns ns2 ip link set dev w1.20 up
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge"
+	ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge"
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index 66496659bea7..e134a5f529c9 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -224,7 +224,7 @@ fib_ipv4_plen_test()
 	ip -n $ns link set dev dummy1 up
 
 	# Add two routes with the same key and different prefix length and
-	# make sure both are in hardware. It can be verfied that both are
+	# make sure both are in hardware. It can be verified that both are
 	# sharing the same leaf by checking the /proc/net/fib_trie
 	ip -n $ns route add 192.0.2.0/24 dev dummy1
 	ip -n $ns route add 192.0.2.0/25 dev dummy1
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
new file mode 100755
index 000000000000..088b65e64d66
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -0,0 +1,361 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |  2001:db8:1::2/64                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# |  2001:db8:2::1/64 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# |  2001:db8:2::2/64       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_ipv4
+	multipath_ipv6
+	multipath_ipv6_l4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip -6 nexthop add id 101 dev g1a
+	ip -6 nexthop add id 102 dev g1b
+	ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+		idle_timer 0
+
+	ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+	ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip nexthop del id 103
+	ip -6 nexthop del id 102
+	ip -6 nexthop del id 101
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip -6 nexthop add id 201 dev g2a
+	ip -6 nexthop add id 202 dev g2b
+	ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+		idle_timer 0
+
+	ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+	ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+	tc qdisc add dev $ul2 clsact
+	tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+	tc qdisc del dev $ul2 clsact
+
+	ip route del vrf v$ol2 2001:db8:1::/64
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip nexthop del id 203
+	ip -6 nexthop del id 202
+	ip -6 nexthop del id 201
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 0
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	# Generate 16384 echo requests, each with a random flow label.
+	for ((i=0; i < 16384; ++i)); do
+		ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+	done
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+		$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6_l4()
+{
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index be71012b8fc5..05c05e02bade 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -353,6 +353,11 @@ wait_for_offload()
 	"$@" | grep -q offload
 }
 
+wait_for_trap()
+{
+	"$@" | grep -q trap
+}
+
 until_counter_is()
 {
 	local expr=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
new file mode 100755
index 000000000000..4898dd4118f1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -0,0 +1,400 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_test
+"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip nexthop del id 103
+	ip nexthop del id 101
+	ip nexthop del id 102
+	ip nexthop del id 106
+	ip nexthop del id 104
+	ip nexthop del id 105
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip nexthop del id 201
+	ip nexthop del id 202
+	ip nexthop del id 204
+	ip nexthop del id 205
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+	ip nexthop add id 101 via 169.254.2.22 dev $rp12
+	ip nexthop add id 102 via 169.254.3.23 dev $rp13
+	ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+	ip nexthop add id 104 via fe80:2::22 dev $rp12
+	ip nexthop add id 105 via fe80:3::23 dev $rp13
+	ip nexthop add id 106 group 104/105 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+	ip nexthop add id 201 via 169.254.2.12 dev $rp22
+	ip nexthop add id 202 via 169.254.3.13 dev $rp23
+	ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+	ip nexthop add id 204 via fe80:2::12 dev $rp22
+	ip nexthop add id 205 via fe80:3::13 dev $rp23
+	ip nexthop add id 206 group 204/205 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the provided weights.
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	# Restore settings.
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the provided weights.
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+	# Without an idle timer, weight replacement should happen immediately.
+	log_info "Running multipath tests without an idle timer"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+	log_info "Running IPv4 multipath tests"
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 2 1
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+	# With an idle timer, weight replacement should not happen, so the
+	# expected ratio should always be the initial one (1:1).
+	log_info "Running multipath tests with an idle timer of 120 seconds"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 120
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 120
+
+	log_info "Running IPv4 multipath tests"
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 1 1
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 1 1
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 1 1
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 1 1
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+	# With a short idle timer and enough idle time, weight replacement
+	# should happen.
+	log_info "Running multipath tests with an idle timer of 5 seconds"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 5
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 5
+
+	log_info "Running IPv4 multipath tests"
+	sleep 10
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	sleep 10
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 2 1
+	sleep 10
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	sleep 10
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	sleep 10
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	sleep 10
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Nexthop objects not supported; skipping tests"
+	exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index ad32240fbfda..fe990d8696a9 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -279,14 +279,19 @@ do_transfer()
 		let rm_nr_ns1=-addr_nr_ns1
 		if [ $rm_nr_ns1 -lt 8 ]; then
 			counter=1
-			sleep 1
-
-			while [ $counter -le $rm_nr_ns1 ]
-			do
-				ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
+			if [ ${#dump[@]} -gt 0 ]; then
+				id=${dump[1]}
 				sleep 1
-				let counter+=1
-			done
+
+				while [ $counter -le $rm_nr_ns1 ]
+				do
+					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
+					sleep 1
+					let counter+=1
+					let id+=1
+				done
+			fi
 		else
 			sleep 1
 			ip netns exec ${listener_ns} ./pm_nl_ctl flush
@@ -313,14 +318,19 @@ do_transfer()
 		let rm_nr_ns2=-addr_nr_ns2
 		if [ $rm_nr_ns2 -lt 8 ]; then
 			counter=1
-			sleep 1
-
-			while [ $counter -le $rm_nr_ns2 ]
-			do
-				ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
+			if [ ${#dump[@]} -gt 0 ]; then
+				id=${dump[1]}
 				sleep 1
-				let counter+=1
-			done
+
+				while [ $counter -le $rm_nr_ns2 ]
+				do
+					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
+					sleep 1
+					let counter+=1
+					let id+=1
+				done
+			fi
 		else
 			sleep 1
 			ip netns exec ${connector_ns} ./pm_nl_ctl flush
@@ -605,11 +615,22 @@ chk_rm_nr()
 {
 	local rm_addr_nr=$1
 	local rm_subflow_nr=$2
+	local invert=${3:-""}
 	local count
 	local dump_stats
+	local addr_ns
+	local subflow_ns
+
+	if [ -z $invert ]; then
+		addr_ns=$ns1
+		subflow_ns=$ns2
+	elif [ $invert = "invert" ]; then
+		addr_ns=$ns2
+		subflow_ns=$ns1
+	fi
 
 	printf "%-39s %s" " " "rm "
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+	count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_addr_nr" ]; then
 		echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
@@ -620,7 +641,7 @@ chk_rm_nr()
 	fi
 
 	echo -n " - sf    "
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+	count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_subflow_nr" ]; then
 		echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
@@ -828,7 +849,7 @@ remove_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address" 1 1 1
 	chk_add_nr 1 1
-	chk_rm_nr 0 0
+	chk_rm_nr 1 1 invert
 
 	# subflow and signal, remove
 	reset
@@ -864,6 +885,29 @@ remove_tests()
 	chk_join_nr "flush subflows and signal" 3 3 3
 	chk_add_nr 1 1
 	chk_rm_nr 2 2
+
+	# subflows flush
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+	chk_join_nr "flush subflows" 3 3 3
+	chk_rm_nr 3 3
+
+	# addresses flush
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+	chk_join_nr "flush addresses" 3 3 3
+	chk_add_nr 3 3
+	chk_rm_nr 3 3 invert
 }
 
 add_tests()
@@ -940,7 +984,7 @@ ipv6_tests()
 	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
 	chk_join_nr "remove single address IPv6" 1 1 1
 	chk_add_nr 1 1
-	chk_rm_nr 0 0
+	chk_rm_nr 1 1 invert
 
 	# subflow and signal IPv6, remove
 	reset
@@ -1083,7 +1127,7 @@ add_addr_ports_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address with port" 1 1 1
 	chk_add_nr 1 1 1
-	chk_rm_nr 0 0
+	chk_rm_nr 1 1 invert
 
 	# subflow and signal with port, remove
 	reset
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 431296c0f91c..427d94816f2d 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -371,6 +371,88 @@ else
 	ip netns exec nsr1 nft list ruleset
 fi
 
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+ip -net nsr1 link add name br0 type bridge
+ip -net nsr1 addr flush dev veth0
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set veth0 master br0
+ip -net nsr1 addr add 10.0.1.1/24 dev br0
+ip -net nsr1 addr add dead:1::1/64 dev br0
+ip -net nsr1 link set up dev br0
+
+ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec nsr1 nft -f - <<EOF
+flush table ip nat
+table ip nat {
+   chain prerouting {
+      type nat hook prerouting priority 0; policy accept;
+      meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+   }
+
+   chain postrouting {
+      type nat hook postrouting priority 0; policy accept;
+      meta oifname "veth1" counter masquerade
+   }
+}
+EOF
+
+if test_tcp_forwarding_nat ns1 ns2; then
+	echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
+else
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+	ip netns exec nsr1 nft list ruleset
+	ret=1
+fi
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set down dev veth0
+ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set up dev veth0.10
+ip -net nsr1 link set veth0.10 master br0
+
+ip -net ns1 addr flush dev eth0
+ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net ns1 link set eth0 up
+ip -net ns1 link set eth0.10 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0.10
+
+if test_tcp_forwarding_nat ns1 ns2; then
+	echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
+else
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+	ip netns exec nsr1 nft list ruleset
+	ret=1
+fi
+
+# restore test topology (remove bridge and VLAN)
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set veth0 down
+ip -net nsr1 link set veth0.10 down
+ip -net nsr1 link delete veth0.10 type vlan
+ip -net nsr1 link delete br0 type bridge
+ip -net ns1 addr flush dev eth0.10
+ip -net ns1 link set eth0.10 down
+ip -net ns1 link set eth0 down
+ip -net ns1 link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net ns1 link set eth0 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0
+ip -net ns1 route add default via dead:1::1
+ip -net nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net nsr1 link set up dev veth0
+
 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
 SPI1=$RANDOM