diff options
1025 files changed, 37712 insertions, 15108 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdb22006f713..ee0569a040c6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4945,8 +4945,6 @@ sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. - sbni= [NET] Granch SBNI12 leased line adapter - sched_verbose [KNL] Enables verbose scheduler debug messages. schedstats= [KNL,X86] Enable or disable scheduled statistics. diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index f84e31348d80..fb547e26c676 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -104,9 +104,18 @@ properties: maximum: 32 maxItems: 1 + power-domains: + description: + Power domain provider node and an args specifier containing + the can device id value. + maxItems: 1 + can-transceiver: $ref: can-transceiver.yaml# + phys: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml new file mode 100644 index 000000000000..eca41443fcce --- /dev/null +++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml @@ -0,0 +1,244 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/fsl,fec.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale Fast Ethernet Controller (FEC) + +maintainers: + - Joakim Zhang <qiangqing.zhang@nxp.com> + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + oneOf: + - enum: + - fsl,imx25-fec + - fsl,imx27-fec + - fsl,imx28-fec + - fsl,imx6q-fec + - fsl,mvf600-fec + - items: + - enum: + - fsl,imx53-fec + - fsl,imx6sl-fec + - const: fsl,imx25-fec + - items: + - enum: + - fsl,imx35-fec + - fsl,imx51-fec + - const: fsl,imx27-fec + - items: + - enum: + - fsl,imx6ul-fec + - fsl,imx6sx-fec + - const: fsl,imx6q-fec + - items: + - enum: + - fsl,imx7d-fec + - const: fsl,imx6sx-fec + - items: + - const: fsl,imx8mq-fec + - const: fsl,imx6sx-fec + - items: + - enum: + - fsl,imx8mm-fec + - fsl,imx8mn-fec + - fsl,imx8mp-fec + - const: fsl,imx8mq-fec + - const: fsl,imx6sx-fec + - items: + - const: fsl,imx8qm-fec + - const: fsl,imx6sx-fec + - items: + - enum: + - fsl,imx8qxp-fec + - const: fsl,imx8qm-fec + - const: fsl,imx6sx-fec + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 4 + + interrupt-names: + oneOf: + - items: + - const: int0 + - items: + - const: int0 + - const: pps + - items: + - const: int0 + - const: int1 + - const: int2 + - items: + - const: int0 + - const: int1 + - const: int2 + - const: pps + + clocks: + minItems: 2 + maxItems: 5 + description: + The "ipg", for MAC ipg_clk_s, ipg_clk_mac_s that are for register accessing. + The "ahb", for MAC ipg_clk, ipg_clk_mac that are bus clock. + The "ptp"(option), for IEEE1588 timer clock that requires the clock. + The "enet_clk_ref"(option), for MAC transmit/receiver reference clock like + RGMII TXC clock or RMII reference clock. It depends on board design, + the clock is required if RGMII TXC and RMII reference clock source from + SOC internal PLL. + The "enet_out"(option), output clock for external device, like supply clock + for PHY. The clock is required if PHY clock source from SOC. + The "enet_2x_txclk"(option), for RGMII sampling clock which fixed at 250Mhz. + The clock is required if SoC RGMII enable clock delay. + + clock-names: + minItems: 2 + maxItems: 5 + items: + enum: + - ipg + - ahb + - ptp + - enet_clk_ref + - enet_out + - enet_2x_txclk + + phy-mode: true + + phy-handle: true + + fixed-link: true + + local-mac-address: true + + mac-address: true + + tx-internal-delay-ps: + enum: [0, 2000] + + rx-internal-delay-ps: + enum: [0, 2000] + + phy-supply: + description: + Regulator that powers the Ethernet PHY. + + fsl,num-tx-queues: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The property is valid for enet-avb IP, which supports hw multi queues. + Should specify the tx queue number, otherwise set tx queue number to 1. + enum: [1, 2, 3] + + fsl,num-rx-queues: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The property is valid for enet-avb IP, which supports hw multi queues. + Should specify the rx queue number, otherwise set rx queue number to 1. + enum: [1, 2, 3] + + fsl,magic-packet: + $ref: /schemas/types.yaml#/definitions/flag + description: + If present, indicates that the hardware supports waking up via magic packet. + + fsl,err006687-workaround-present: + $ref: /schemas/types.yaml#/definitions/flag + description: + If present indicates that the system has the hardware workaround for + ERR006687 applied and does not need a software workaround. + + fsl,stop-mode: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Register bits of stop mode control, the format is <&gpr req_gpr req_bit>. + gpr is the phandle to general purpose register node. + req_gpr is the gpr register offset for ENET stop request. + req_bit is the gpr bit offset for ENET stop request. + + mdio: + type: object + description: + Specifies the mdio bus in the FEC, used as a container for phy nodes. + + # Deprecated optional properties: + # To avoid these, create a phy node according to ethernet-phy.yaml in the same + # directory, and point the FEC's "phy-handle" property to it. Then use + # the phy's reset binding, again described by ethernet-phy.yaml. + + phy-reset-gpios: + deprecated: true + description: + Should specify the gpio for phy reset. + + phy-reset-duration: + deprecated: true + description: + Reset duration in milliseconds. Should present only if property + "phy-reset-gpios" is available. Missing the property will have the + duration be 1 millisecond. Numbers greater than 1000 are invalid + and 1 millisecond will be used instead. + + phy-reset-active-high: + deprecated: true + description: + If present then the reset sequence using the GPIO specified in the + "phy-reset-gpios" property is reversed (H=reset state, L=operation state). + + phy-reset-post-delay: + deprecated: true + description: + Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay + milliseconds will be observed after the phy-reset-gpios has been toggled. + Can be omitted thus no delay is observed. Delay is in range of 1ms to 1000ms. + Other delays are invalid. + +required: + - compatible + - reg + - interrupts + +# FIXME: We had better set additionalProperties to false to avoid invalid or at +# least undocumented properties. However, PHY may have a deprecated option to +# place PHY OF properties in the MAC node, such as Micrel PHY, and we can find +# these boards which is based on i.MX6QDL. +additionalProperties: false + +examples: + - | + ethernet@83fec000 { + compatible = "fsl,imx51-fec", "fsl,imx27-fec"; + reg = <0x83fec000 0x4000>; + interrupts = <87>; + phy-mode = "mii"; + phy-reset-gpios = <&gpio2 14 0>; + phy-supply = <®_fec_supply>; + }; + + ethernet@83fed000 { + compatible = "fsl,imx51-fec", "fsl,imx27-fec"; + reg = <0x83fed000 0x4000>; + interrupts = <87>; + phy-mode = "mii"; + phy-reset-gpios = <&gpio2 14 0>; + phy-supply = <®_fec_supply>; + phy-handle = <ðphy0>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy0: ethernet-phy@0 { + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt deleted file mode 100644 index 9b543789cd52..000000000000 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ /dev/null @@ -1,95 +0,0 @@ -* Freescale Fast Ethernet Controller (FEC) - -Required properties: -- compatible : Should be "fsl,<soc>-fec" -- reg : Address and length of the register set for the device -- interrupts : Should contain fec interrupt -- phy-mode : See ethernet.txt file in the same directory - -Optional properties: -- phy-supply : regulator that powers the Ethernet PHY. -- phy-handle : phandle to the PHY device connected to this device. -- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory. - Use instead of phy-handle. -- fsl,num-tx-queues : The property is valid for enet-avb IP, which supports - hw multi queues. Should specify the tx queue number, otherwise set tx queue - number to 1. -- fsl,num-rx-queues : The property is valid for enet-avb IP, which supports - hw multi queues. Should specify the rx queue number, otherwise set rx queue - number to 1. -- fsl,magic-packet : If present, indicates that the hardware supports waking - up via magic packet. -- fsl,err006687-workaround-present: If present indicates that the system has - the hardware workaround for ERR006687 applied and does not need a software - workaround. -- fsl,stop-mode: register bits of stop mode control, the format is - <&gpr req_gpr req_bit>. - gpr is the phandle to general purpose register node. - req_gpr is the gpr register offset for ENET stop request. - req_bit is the gpr bit offset for ENET stop request. - -interrupt-names: names of the interrupts listed in interrupts property in - the same order. The defaults if not specified are - __Number of interrupts__ __Default__ - 1 "int0" - 2 "int0", "pps" - 3 "int0", "int1", "int2" - 4 "int0", "int1", "int2", "pps" - The order may be changed as long as they correspond to the interrupts - property. Currently, only i.mx7 uses "int1" and "int2". They correspond to - tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts. - For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse - per second interrupt associated with 1588 precision time protocol(PTP). - -Optional subnodes: -- mdio : specifies the mdio bus in the FEC, used as a container for phy nodes - according to phy.txt in the same directory - -Deprecated optional properties: - To avoid these, create a phy node according to phy.txt in the same - directory, and point the fec's "phy-handle" property to it. Then use - the phy's reset binding, again described by phy.txt. -- phy-reset-gpios : Should specify the gpio for phy reset -- phy-reset-duration : Reset duration in milliseconds. Should present - only if property "phy-reset-gpios" is available. Missing the property - will have the duration be 1 millisecond. Numbers greater than 1000 are - invalid and 1 millisecond will be used instead. -- phy-reset-active-high : If present then the reset sequence using the GPIO - specified in the "phy-reset-gpios" property is reversed (H=reset state, - L=operation state). -- phy-reset-post-delay : Post reset delay in milliseconds. If present then - a delay of phy-reset-post-delay milliseconds will be observed after the - phy-reset-gpios has been toggled. Can be omitted thus no delay is - observed. Delay is in range of 1ms to 1000ms. Other delays are invalid. - -Example: - -ethernet@83fec000 { - compatible = "fsl,imx51-fec", "fsl,imx27-fec"; - reg = <0x83fec000 0x4000>; - interrupts = <87>; - phy-mode = "mii"; - phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */ - local-mac-address = [00 04 9F 01 1B B9]; - phy-supply = <®_fec_supply>; -}; - -Example with phy specified: - -ethernet@83fec000 { - compatible = "fsl,imx51-fec", "fsl,imx27-fec"; - reg = <0x83fec000 0x4000>; - interrupts = <87>; - phy-mode = "mii"; - phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */ - local-mac-address = [00 04 9F 01 1B B9]; - phy-supply = <®_fec_supply>; - phy-handle = <ðphy>; - mdio { - clock-frequency = <5000000>; - ethphy: ethernet-phy@6 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <6>; - max-speed = <100>; - }; - }; -}; diff --git a/Documentation/driver-api/nfc/nfc-hci.rst b/Documentation/driver-api/nfc/nfc-hci.rst index eb8a1a14e919..f10fe53aa9fe 100644 --- a/Documentation/driver-api/nfc/nfc-hci.rst +++ b/Documentation/driver-api/nfc/nfc-hci.rst @@ -181,7 +181,7 @@ xmit_from_hci(): The llc must be registered with nfc before it can be used. Do that by calling:: - nfc_llc_register(const char *name, struct nfc_llc_ops *ops); + nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); Again, note that the llc does not handle the physical link. It is thus very easy to mix any physical link with any llc for a given chip driver. diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 62f2aab8eaec..31cfd7d674a6 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -501,6 +501,18 @@ fail_over_mac This option was added in bonding version 3.2.0. The "follow" policy was added in bonding version 3.3.0. +lacp_active + Option specifying whether to send LACPDU frames periodically. + + off or 0 + LACPDU frames acts as "speak when spoken to". + + on or 1 + LACPDU frames are sent along the configured links + periodically. See lacp_rate for more details. + + The default is on. + lacp_rate Option specifying the rate in which we'll ask our link partner diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst index ee40fcc5ddff..62f4a4aff6ec 100644 --- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst @@ -9,3 +9,4 @@ DPAA2 Documentation dpio-driver ethernet-driver mac-phy-support + switch-driver diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst new file mode 100644 index 000000000000..8bf411b857d4 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst @@ -0,0 +1,217 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +=================== +DPAA2 Switch driver +=================== + +:Copyright: |copy| 2021 NXP + +The DPAA2 Switch driver probes on the Datapath Switch (DPSW) object which can +be instantiated on the following DPAA2 SoCs and their variants: LS2088A and +LX2160A. + +The driver uses the switch device driver model and exposes each switch port as +a network interface, which can be included in a bridge or used as a standalone +interface. Traffic switched between ports is offloaded into the hardware. + +The DPSW can have ports connected to DPNIs or to DPMACs for external access. +:: + + [ethA] [ethB] [ethC] [ethD] [ethE] [ethF] + : : : : : : + : : : : : : + [dpaa2-eth] [dpaa2-eth] [ dpaa2-switch ] + : : : : : : kernel + ============================================================================= + : : : : : : hardware + [DPNI] [DPNI] [============= DPSW =================] + | | | | | | + | ---------- | [DPMAC] [DPMAC] + ------------------------------- | | + | | + [PHY] [PHY] + +Creating an Ethernet Switch +=========================== + +The dpaa2-switch driver probes on DPSW devices found on the fsl-mc bus. These +devices can be either created statically through the boot time configuration +file - DataPath Layout (DPL) - or at runtime using the DPAA2 object APIs +(incorporated already into the restool userspace tool). + +At the moment, the dpaa2-switch driver imposes the following restrictions on +the DPSW object that it will probe: + + * The minimum number of FDBs should be at least equal to the number of switch + interfaces. This is necessary so that separation of switch ports can be + done, ie when not under a bridge, each switch port will have its own FDB. + :: + + fsl_dpaa2_switch dpsw.0: The number of FDBs is lower than the number of ports, cannot probe + + * Both the broadcast and flooding configuration should be per FDB. This + enables the driver to restrict the broadcast and flooding domains of each + FDB depending on the switch ports that are sharing it (aka are under the + same bridge). + :: + + fsl_dpaa2_switch dpsw.0: Flooding domain is not per FDB, cannot probe + fsl_dpaa2_switch dpsw.0: Broadcast domain is not per FDB, cannot probe + + * The control interface of the switch should not be disabled + (DPSW_OPT_CTRL_IF_DIS not passed as a create time option). Without the + control interface, the driver is not capable to provide proper Rx/Tx traffic + support on the switch port netdevices. + :: + + fsl_dpaa2_switch dpsw.0: Control Interface is disabled, cannot probe + +Besides the configuration of the actual DPSW object, the dpaa2-switch driver +will need the following DPAA2 objects: + + * 1 DPMCP - A Management Command Portal object is needed for any interraction + with the MC firmware. + + * 1 DPBP - A Buffer Pool is used for seeding buffers intended for the Rx path + on the control interface. + + * Access to at least one DPIO object (Software Portal) is needed for any + enqueue/dequeue operation to be performed on the control interface queues. + The DPIO object will be shared, no need for a private one. + +Switching features +================== + +The driver supports the configuration of L2 forwarding rules in hardware for +port bridging as well as standalone usage of the independent switch interfaces. + +The hardware is not configurable with respect to VLAN awareness, thus any DPAA2 +switch port should be used only in usecases with a VLAN aware bridge:: + + $ ip link add dev br0 type bridge vlan_filtering 1 + + $ ip link add dev br1 type bridge + $ ip link set dev ethX master br1 + Error: fsl_dpaa2_switch: Cannot join a VLAN-unaware bridge + +Topology and loop detection through STP is supported when ``stp_state 1`` is +used at bridge create :: + + $ ip link add dev br0 type bridge vlan_filtering 1 stp_state 1 + +L2 FDB manipulation (add/delete/dump) is supported. + +HW FDB learning can be configured on each switch port independently through +bridge commands. When the HW learning is disabled, a fast age procedure will be +run and any previously learnt addresses will be removed. +:: + + $ bridge link set dev ethX learning off + $ bridge link set dev ethX learning on + +Restricting the unknown unicast and multicast flooding domain is supported, but +not independently of each other:: + + $ ip link set dev ethX type bridge_slave flood off mcast_flood off + $ ip link set dev ethX type bridge_slave flood off mcast_flood on + Error: fsl_dpaa2_switch: Cannot configure multicast flooding independently of unicast. + +Broadcast flooding on a switch port can be disabled/enabled through the brport sysfs:: + + $ echo 0 > /sys/bus/fsl-mc/devices/dpsw.Y/net/ethX/brport/broadcast_flood + +Offloads +======== + +Routing actions (redirect, trap, drop) +-------------------------------------- + +The DPAA2 switch is able to offload flow-based redirection of packets making +use of ACL tables. Shared filter blocks are supported by sharing a single ACL +table between multiple ports. + +The following flow keys are supported: + + * Ethernet: dst_mac/src_mac + * IPv4: dst_ip/src_ip/ip_proto/tos + * VLAN: vlan_id/vlan_prio/vlan_tpid/vlan_dei + * L4: dst_port/src_port + +Also, the matchall filter can be used to redirect the entire traffic received +on a port. + +As per flow actions, the following are supported: + + * drop + * mirred egress redirect + * trap + +Each ACL entry (filter) can be setup with only one of the listed +actions. + +Example 1: send frames received on eth4 with a SA of 00:01:02:03:04:05 to the +CPU:: + + $ tc qdisc add dev eth4 clsact + $ tc filter add dev eth4 ingress flower src_mac 00:01:02:03:04:05 skip_sw action trap + +Example 2: drop frames received on eth4 with VID 100 and PCP of 3:: + + $ tc filter add dev eth4 ingress protocol 802.1q flower skip_sw vlan_id 100 vlan_prio 3 action drop + +Example 3: redirect all frames received on eth4 to eth1:: + + $ tc filter add dev eth4 ingress matchall action mirred egress redirect dev eth1 + +Example 4: Use a single shared filter block on both eth5 and eth6:: + + $ tc qdisc add dev eth5 ingress_block 1 clsact + $ tc qdisc add dev eth6 ingress_block 1 clsact + $ tc filter add block 1 ingress flower dst_mac 00:01:02:03:04:04 skip_sw \ + action trap + $ tc filter add block 1 ingress protocol ipv4 flower src_ip 192.168.1.1 skip_sw \ + action mirred egress redirect dev eth3 + +Mirroring +~~~~~~~~~ + +The DPAA2 switch supports only per port mirroring and per VLAN mirroring. +Adding mirroring filters in shared blocks is also supported. + +When using the tc-flower classifier with the 802.1q protocol, only the +''vlan_id'' key will be accepted. Mirroring based on any other fields from the +802.1q protocol will be rejected:: + + $ tc qdisc add dev eth8 ingress_block 1 clsact + $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_prio 3 action mirred egress mirror dev eth6 + Error: fsl_dpaa2_switch: Only matching on VLAN ID supported. + We have an error talking to the kernel + +If a mirroring VLAN filter is requested on a port, the VLAN must to be +installed on the switch port in question either using ''bridge'' or by creating +a VLAN upper device if the switch port is used as a standalone interface:: + + $ tc qdisc add dev eth8 ingress_block 1 clsact + $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6 + Error: VLAN must be installed on the switch port. + We have an error talking to the kernel + + $ bridge vlan add vid 200 dev eth8 + $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6 + + $ ip link add link eth8 name eth8.200 type vlan id 200 + $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6 + +Also, it should be noted that the mirrored traffic will be subject to the same +egress restrictions as any other traffic. This means that when a mirrored +packet will reach the mirror port, if the VLAN found in the packet is not +installed on the port it will get dropped. + +The DPAA2 switch supports only a single mirroring destination, thus multiple +mirror rules can be installed but their ''to'' port has to be the same:: + + $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6 + $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 100 action mirred egress mirror dev eth7 + Error: fsl_dpaa2_switch: Multiple mirror ports not supported. + We have an error talking to the kernel diff --git a/Documentation/networking/devlink/hns3.rst b/Documentation/networking/devlink/hns3.rst new file mode 100644 index 000000000000..4562a6e4782f --- /dev/null +++ b/Documentation/networking/devlink/hns3.rst @@ -0,0 +1,25 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +hns3 devlink support +==================== + +This document describes the devlink features implemented by the ``hns3`` +device driver. + +The ``hns3`` driver supports reloading via ``DEVLINK_CMD_RELOAD``. + +Info versions +============= + +The ``hns3`` driver reports the following versions + +.. list-table:: devlink info versions implemented + :widths: 10 10 80 + + * - Name + - Type + - Description + * - ``fw`` + - running + - Used to represent the firmware version. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index b3b9e0692088..03f56ed2961f 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -34,6 +34,7 @@ parameters, info versions, and other features it supports. :maxdepth: 1 bnxt + hns3 ionic ice mlx4 diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index 3e2221f4abe4..5f13905b12e0 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -320,13 +320,6 @@ Examples for low-level BPF: ret #-1 drop: ret #0 -**(Accelerated) VLAN w/ id 10**:: - - ld vlan_tci - jneq #10, drop - ret #-1 - drop: ret #0 - **icmp random packet sampling, 1 in 4**:: ldh [12] @@ -358,6 +351,22 @@ Examples for low-level BPF: bad: ret #0 /* SECCOMP_RET_KILL_THREAD */ good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */ +Examples for low-level BPF extension: + +**Packet for interface index 13**:: + + ld ifidx + jneq #13, drop + ret #-1 + drop: ret #0 + +**(Accelerated) VLAN w/ id 10**:: + + ld vlan_tci + jneq #10, drop + ret #-1 + drop: ret #0 + The above example code can be placed into a file (here called "foo"), and then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf and cls_bpf understands and can directly be loaded with. Example with above diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index e9ce55992aa9..58bc8cd367c6 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -57,6 +57,7 @@ Contents: gen_stats gtp ila + ioam6-sysctl ipddp ip_dynaddr ipsec @@ -68,6 +69,7 @@ Contents: l2tp lapb-module mac80211-injection + mctp mpls-sysctl mptcp-sysctl multiqueue diff --git a/Documentation/networking/ioam6-sysctl.rst b/Documentation/networking/ioam6-sysctl.rst new file mode 100644 index 000000000000..c18cab2c481a --- /dev/null +++ b/Documentation/networking/ioam6-sysctl.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +IOAM6 Sysfs variables +===================== + + +/proc/sys/net/conf/<iface>/ioam6_* variables: +============================================= + +ioam6_enabled - BOOL + Accept (= enabled) or ignore (= disabled) IPv6 IOAM options on ingress + for this interface. + + * 0 - disabled (default) + * 1 - enabled + +ioam6_id - SHORT INTEGER + Define the IOAM id of this interface. + + Default is ~0. + +ioam6_id_wide - INTEGER + Define the wide IOAM id of this interface. + + Default is ~0. diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 316c7dfa9693..d91ab28718d4 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1926,6 +1926,23 @@ fib_notify_on_flag_change - INTEGER - 1 - Emit notifications. - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change. +ioam6_id - INTEGER + Define the IOAM id of this node. Uses only 24 bits out of 32 in total. + + Min: 0 + Max: 0xFFFFFF + + Default: 0xFFFFFF + +ioam6_id_wide - LONG INTEGER + Define the wide IOAM id of this node. Uses only 56 bits out of 64 in + total. Can be different from ioam6_id. + + Min: 0 + Max: 0xFFFFFFFFFFFFFF + + Default: 0xFFFFFFFFFFFFFF + IPv6 Fragmentation: ip6frag_high_thresh - INTEGER diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst new file mode 100644 index 000000000000..6100cdc220f6 --- /dev/null +++ b/Documentation/networking/mctp.rst @@ -0,0 +1,213 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================== +Management Component Transport Protocol (MCTP) +============================================== + +net/mctp/ contains protocol support for MCTP, as defined by DMTF standard +DSP0236. Physical interface drivers ("bindings" in the specification) are +provided in drivers/net/mctp/. + +The core code provides a socket-based interface to send and receive MCTP +messages, through an AF_MCTP, SOCK_DGRAM socket. + +Structure: interfaces & networks +================================ + +The kernel models the local MCTP topology through two items: interfaces and +networks. + +An interface (or "link") is an instance of an MCTP physical transport binding +(as defined by DSP0236, section 3.2.47), likely connected to a specific hardware +device. This is represented as a ``struct netdevice``. + +A network defines a unique address space for MCTP endpoints by endpoint-ID +(described by DSP0236, section 3.2.31). A network has a user-visible identifier +to allow references from userspace. Route definitions are specific to one +network. + +Interfaces are associated with one network. A network may be associated with one +or more interfaces. + +If multiple networks are present, each may contain endpoint IDs (EIDs) that are +also present on other networks. + +Sockets API +=========== + +Protocol definitions +-------------------- + +MCTP uses ``AF_MCTP`` / ``PF_MCTP`` for the address- and protocol- families. +Since MCTP is message-based, only ``SOCK_DGRAM`` sockets are supported. + +.. code-block:: C + + int sd = socket(AF_MCTP, SOCK_DGRAM, 0); + +The only (current) value for the ``protocol`` argument is 0. + +As with all socket address families, source and destination addresses are +specified with a ``sockaddr`` type, with a single-byte endpoint address: + +.. code-block:: C + + typedef __u8 mctp_eid_t; + + struct mctp_addr { + mctp_eid_t s_addr; + }; + + struct sockaddr_mctp { + unsigned short int smctp_family; + int smctp_network; + struct mctp_addr smctp_addr; + __u8 smctp_type; + __u8 smctp_tag; + }; + + #define MCTP_NET_ANY 0x0 + #define MCTP_ADDR_ANY 0xff + + +Syscall behaviour +----------------- + +The following sections describe the MCTP-specific behaviours of the standard +socket system calls. These behaviours have been chosen to map closely to the +existing sockets APIs. + +``bind()`` : set local socket address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sockets that receive incoming request packets will bind to a local address, +using the ``bind()`` syscall. + +.. code-block:: C + + struct sockaddr_mctp addr; + + addr.smctp_family = AF_MCTP; + addr.smctp_network = MCTP_NET_ANY; + addr.smctp_addr.s_addr = MCTP_ADDR_ANY; + addr.smctp_type = MCTP_TYPE_PLDM; + addr.smctp_tag = MCTP_TAG_OWNER; + + int rc = bind(sd, (struct sockaddr *)&addr, sizeof(addr)); + +This establishes the local address of the socket. Incoming MCTP messages that +match the network, address, and message type will be received by this socket. +The reference to 'incoming' is important here; a bound socket will only receive +messages with the TO bit set, to indicate an incoming request message, rather +than a response. + +The ``smctp_tag`` value will configure the tags accepted from the remote side of +this socket. Given the above, the only valid value is ``MCTP_TAG_OWNER``, which +will result in remotely "owned" tags being routed to this socket. Since +``MCTP_TAG_OWNER`` is set, the 3 least-significant bits of ``smctp_tag`` are not +used; callers must set them to zero. + +A ``smctp_network`` value of ``MCTP_NET_ANY`` will configure the socket to +receive incoming packets from any locally-connected network. A specific network +value will cause the socket to only receive incoming messages from that network. + +The ``smctp_addr`` field specifies a local address to bind to. A value of +``MCTP_ADDR_ANY`` configures the socket to receive messages addressed to any +local destination EID. + +The ``smctp_type`` field specifies which message types to receive. Only the +lower 7 bits of the type is matched on incoming messages (ie., the +most-significant IC bit is not part of the match). This results in the socket +receiving packets with and without a message integrity check footer. + +``sendto()``, ``sendmsg()``, ``send()`` : transmit an MCTP message +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An MCTP message is transmitted using one of the ``sendto()``, ``sendmsg()`` or +``send()`` syscalls. Using ``sendto()`` as the primary example: + +.. code-block:: C + + struct sockaddr_mctp addr; + char buf[14]; + ssize_t len; + + /* set message destination */ + addr.smctp_family = AF_MCTP; + addr.smctp_network = 0; + addr.smctp_addr.s_addr = 8; + addr.smctp_tag = MCTP_TAG_OWNER; + addr.smctp_type = MCTP_TYPE_ECHO; + + /* arbitrary message to send, with message-type header */ + buf[0] = MCTP_TYPE_ECHO; + memcpy(buf + 1, "hello, world!", sizeof(buf) - 1); + + len = sendto(sd, buf, sizeof(buf), 0, + (struct sockaddr_mctp *)&addr, sizeof(addr)); + +The network and address fields of ``addr`` define the remote address to send to. +If ``smctp_tag`` has the ``MCTP_TAG_OWNER``, the kernel will ignore any bits set +in ``MCTP_TAG_VALUE``, and generate a tag value suitable for the destination +EID. If ``MCTP_TAG_OWNER`` is not set, the message will be sent with the tag +value as specified. If a tag value cannot be allocated, the system call will +report an errno of ``EAGAIN``. + +The application must provide the message type byte as the first byte of the +message buffer passed to ``sendto()``. If a message integrity check is to be +included in the transmitted message, it must also be provided in the message +buffer, and the most-significant bit of the message type byte must be 1. + +The ``sendmsg()`` system call allows a more compact argument interface, and the +message buffer to be specified as a scatter-gather list. At present no ancillary +message types (used for the ``msg_control`` data passed to ``sendmsg()``) are +defined. + +Transmitting a message on an unconnected socket with ``MCTP_TAG_OWNER`` +specified will cause an allocation of a tag, if no valid tag is already +allocated for that destination. The (destination-eid,tag) tuple acts as an +implicit local socket address, to allow the socket to receive responses to this +outgoing message. If any previous allocation has been performed (to for a +different remote EID), that allocation is lost. + +Sockets will only receive responses to requests they have sent (with TO=1) and +may only respond (with TO=0) to requests they have received. + +``recvfrom()``, ``recvmsg()``, ``recv()`` : receive an MCTP message +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An MCTP message can be received by an application using one of the +``recvfrom()``, ``recvmsg()``, or ``recv()`` system calls. Using ``recvfrom()`` +as the primary example: + +.. code-block:: C + + struct sockaddr_mctp addr; + socklen_t addrlen; + char buf[14]; + ssize_t len; + + addrlen = sizeof(addr); + + len = recvfrom(sd, buf, sizeof(buf), 0, + (struct sockaddr_mctp *)&addr, &addrlen); + + /* We can expect addr to describe an MCTP address */ + assert(addrlen >= sizeof(buf)); + assert(addr.smctp_family == AF_MCTP); + + printf("received %zd bytes from remote EID %d\n", rc, addr.smctp_addr); + +The address argument to ``recvfrom`` and ``recvmsg`` is populated with the +remote address of the incoming message, including tag value (this will be needed +in order to reply to the message). + +The first byte of the message buffer will contain the message type byte. If an +integrity check follows the message, it will be included in the received buffer. + +The ``recv()`` system call behaves in a similar way, but does not provide a +remote address to the application. Therefore, these are only useful if the +remote address is already known, or the message does not require a reply. + +Like the send calls, sockets will only receive responses to requests they have +sent (TO=1) and may only respond (TO=0) to requests they have received. diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 17bdcb746dcf..9e4cccb90b87 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -222,6 +222,35 @@ ndo_do_ioctl: Synchronization: rtnl_lock() semaphore. Context: process + This is only called by network subsystems internally, + not by user space calling ioctl as it was in before + linux-5.14. + +ndo_siocbond: + Synchronization: rtnl_lock() semaphore. + Context: process + + Used by the bonding driver for the SIOCBOND family of + ioctl commands. + +ndo_siocwandev: + Synchronization: rtnl_lock() semaphore. + Context: process + + Used by the drivers/net/wan framework to handle + the SIOCWANDEV ioctl with the if_settings structure. + +ndo_siocdevprivate: + Synchronization: rtnl_lock() semaphore. + Context: process + + This is used to implement SIOCDEVPRIVATE ioctl helpers. + These should not be added to new drivers, so don't use. + +ndo_eth_ioctl: + Synchronization: rtnl_lock() semaphore. + Context: process + ndo_get_stats: Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU. Context: atomic (can't sleep under rwlock or RCU) diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 7db3985359bc..a722eb30e014 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -625,7 +625,7 @@ interfaces of a DSA switch to share the same PHC. By design, PTP timestamping with a DSA switch does not need any special handling in the driver for the host port it is attached to. However, when the host port also supports PTP timestamping, DSA will take care of intercepting -the ``.ndo_do_ioctl`` calls towards the host port, and block attempts to enable +the ``.ndo_eth_ioctl`` calls towards the host port, and block attempts to enable hardware timestamping on it. This is because the SO_TIMESTAMPING API does not allow the delivery of multiple hardware timestamps for the same packet, so anybody else except for the DSA switch port must be prevented from doing so. @@ -688,7 +688,7 @@ ethtool ioctl operations for them need to be mediated by their respective MAC driver. Therefore, as opposed to DSA switches, modifications need to be done to each individual MAC driver for PHY timestamping support. This entails: -- Checking, in ``.ndo_do_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)`` +- Checking, in ``.ndo_eth_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)`` is true or not. If it is, then the MAC driver should not process this request but instead pass it on to the PHY using ``phy_mii_ioctl()``. @@ -747,7 +747,7 @@ For example, a typical driver design for TX timestamping might be to split the transmission part into 2 portions: 1. "TX": checks whether PTP timestamping has been previously enabled through - the ``.ndo_do_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the + the ``.ndo_eth_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the current skb requires a TX timestamp ("``skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP``"). If this is true, it sets the "``skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS``" flag. Note: as diff --git a/MAINTAINERS b/MAINTAINERS index c9467d2839f5..41fcfdb24a81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5684,6 +5684,7 @@ DPAA2 ETHERNET SWITCH DRIVER M: Ioana Ciornei <ioana.ciornei@nxp.com> L: netdev@vger.kernel.org S: Maintained +F: Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch* F: drivers/net/ethernet/freescale/dpaa2/dpsw* @@ -10388,6 +10389,7 @@ F: net/core/skmsg.c F: net/core/sock_map.c F: net/ipv4/tcp_bpf.c F: net/ipv4/udp_bpf.c +F: net/unix/unix_bpf.c LANDLOCK SECURITY MODULE M: Mickaël Salaün <mic@digikod.net> @@ -11030,6 +11032,18 @@ F: drivers/mailbox/arm_mhuv2.c F: include/linux/mailbox/arm_mhuv2_message.h F: Documentation/devicetree/bindings/mailbox/arm,mhuv2.yaml +MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) +M: Jeremy Kerr <jk@codeconstruct.com.au> +M: Matt Johnston <matt@codeconstruct.com.au> +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/networking/mctp.rst +F: drivers/net/mctp/ +F: include/net/mctp.h +F: include/net/mctpdevice.h +F: include/net/netns/mctp.h +F: net/mctp/ + MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 M: Michael Kerrisk <mtk.manpages@gmail.com> L: linux-man@vger.kernel.org @@ -11327,6 +11341,12 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/radio/radio-maxiradio* +MAXLINEAR ETHERNET PHY DRIVER +M: Xu Liang <lxu@maxlinear.com> +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/phy/mxl-gpy.c + MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER R: Yasushi SHOJI <yashi@spacecubics.com> L: linux-can@vger.kernel.org diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 6b3daba60987..1dd9baf4a6c2 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -129,6 +129,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index 98ccc81ca6d9..8e41c8b7bd70 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -189,7 +189,7 @@ status = "disabled"; }; - fec: fec@50038000 { + fec: ethernet@50038000 { compatible = "fsl,imx35-fec", "fsl,imx27-fec"; reg = <0x50038000 0x4000>; clocks = <&clks 46>, <&clks 8>; diff --git a/arch/arm/boot/dts/imx6q-novena.dts b/arch/arm/boot/dts/imx6q-novena.dts index 52e3567d1859..225cf6b7a7a4 100644 --- a/arch/arm/boot/dts/imx6q-novena.dts +++ b/arch/arm/boot/dts/imx6q-novena.dts @@ -222,20 +222,30 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet_novena>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>; - rxc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - txc-skew-ps = <3000>; - txen-skew-ps = <0>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <3000>; - txd1-skew-ps = <3000>; - txd2-skew-ps = <3000>; - txd3-skew-ps = <3000>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + rxc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + txc-skew-ps = <3000>; + txen-skew-ps = <0>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <3000>; + txd1-skew-ps = <3000>; + txd2-skew-ps = <3000>; + txd3-skew-ps = <3000>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi index ead7ba27e105..563bf9d44fe0 100644 --- a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi @@ -316,12 +316,22 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio7 18 GPIO_ACTIVE_LOW>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &gpmi { diff --git a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi index d526f01a2c52..ac34709e9741 100644 --- a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi @@ -190,23 +190,33 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi index a0917823c244..c96f4d7e1e0d 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi @@ -332,23 +332,33 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi index 1243677b5f97..49da30d7510c 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi @@ -265,23 +265,33 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi index fdc3aa9d544d..eb9a0b104f1c 100644 --- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi @@ -324,20 +324,30 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index c6d1c63f7905..5e6bef230dc7 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -216,7 +216,6 @@ phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio7 15 GPIO_ACTIVE_LOW>; phy-reset-duration = <1>; - phy-reset-delay = <1>; phy-supply = <®_fec1_pwdn>; phy-handle = <ðphy1_0>; fsl,magic-packet; diff --git a/arch/arm/boot/dts/imx7d-mba7.dts b/arch/arm/boot/dts/imx7d-mba7.dts index 23856a8d4b8c..36ef6a3cdb0b 100644 --- a/arch/arm/boot/dts/imx7d-mba7.dts +++ b/arch/arm/boot/dts/imx7d-mba7.dts @@ -23,7 +23,6 @@ phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>; phy-reset-duration = <1>; - phy-reset-delay = <1>; phy-supply = <®_fec2_pwdn>; phy-handle = <ðphy2_0>; fsl,magic-packet; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index e7648c3b8390..1608a48495b6 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -920,7 +920,7 @@ }; fec1: ethernet@30be0000 { - compatible = "fsl,imx8mm-fec", "fsl,imx6sx-fec"; + compatible = "fsl,imx8mm-fec", "fsl,imx8mq-fec", "fsl,imx6sx-fec"; reg = <0x30be0000 0x10000>; interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index d4231e061403..e6de293865b0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -923,7 +923,7 @@ }; fec1: ethernet@30be0000 { - compatible = "fsl,imx8mn-fec", "fsl,imx6sx-fec"; + compatible = "fsl,imx8mn-fec", "fsl,imx8mq-fec", "fsl,imx6sx-fec"; reg = <0x30be0000 0x10000>; interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi index f5f58959f65c..46da21af3702 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi @@ -17,9 +17,9 @@ }; &fec1 { - compatible = "fsl,imx8qxp-fec", "fsl,imx6sx-fec"; + compatible = "fsl,imx8qxp-fec", "fsl,imx8qm-fec", "fsl,imx6sx-fec"; }; &fec2 { - compatible = "fsl,imx8qxp-fec", "fsl,imx6sx-fec"; + compatible = "fsl,imx8qxp-fec", "fsl,imx8qm-fec", "fsl,imx6sx-fec"; }; diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 23a9fb73c04f..79c1a750e357 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -5,6 +5,9 @@ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #ifdef CONFIG_COMPAT @@ -27,13 +30,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; typedef u16 __compat_uid16_t; typedef u16 __compat_gid16_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u32 compat_dev_t; typedef s32 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -103,13 +102,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 65975712a22d..53f015a1b0a7 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -9,20 +9,25 @@ #include <asm/page.h> #include <asm/ptrace.h> +typedef s32 __compat_uid_t; +typedef s32 __compat_gid_t; +typedef __compat_uid_t __compat_uid32_t; +typedef __compat_gid_t __compat_gid32_t; +#define __compat_uid32_t __compat_uid32_t +#define __compat_gid32_t __compat_gid32_t + +#define _COMPAT_NSIG 128 /* Don't ask !$@#% ... */ +#define _COMPAT_NSIG_BPW 32 +typedef u32 compat_sigset_word; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "mips\0\0\0" -typedef s32 __compat_uid_t; -typedef s32 __compat_gid_t; -typedef __compat_uid_t __compat_uid32_t; -typedef __compat_gid_t __compat_gid32_t; -typedef u32 compat_mode_t; typedef u32 compat_dev_t; typedef u32 compat_nlink_t; typedef s32 compat_ipc_pid_t; -typedef s32 compat_caddr_t; typedef struct { s32 val[2]; } compat_fsid_t; @@ -89,13 +94,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0x7fffffffUL -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 128 /* Don't ask !$@#% ... */ -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff static inline void __user *arch_compat_alloc_user_space(long len) diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index cdf404a831b2..1eaf6a1ca561 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 1a609d38f667..b5d90e82b65d 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -8,6 +8,9 @@ #include <linux/sched.h> #include <linux/thread_info.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 @@ -15,13 +18,9 @@ typedef u32 __compat_uid_t; typedef u32 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u32 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; struct compat_stat { compat_dev_t st_dev; /* dev_t is 32 bits on parisc */ @@ -96,13 +95,6 @@ struct compat_sigcontext { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 5b5351cdcb33..8baaad52d799 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -121,6 +121,8 @@ #define SO_NETNS_COOKIE 0x4045 +#define SO_BUF_LOCK 0x4046 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 9191fc29e6ed..e33dcf134cdd 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -19,13 +19,9 @@ typedef u32 __compat_uid_t; typedef u32 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u32 compat_mode_t; typedef u32 compat_dev_t; typedef s16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -85,13 +81,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff static inline void __user *arch_compat_alloc_user_space(long len) diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 20f169b6db4e..36dbf5043fc0 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -53,8 +53,6 @@ extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, int num_devices, const char *buf); -struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv, - char *bus_id); extern int ccwgroup_set_online(struct ccwgroup_device *gdev); extern int ccwgroup_set_offline(struct ccwgroup_device *gdev); diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index ea5b9c34b7be..8d49505b4a43 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -9,6 +9,9 @@ #include <linux/sched/task_stack.h> #include <linux/thread_info.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \ @@ -55,13 +58,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; typedef struct { @@ -155,13 +154,6 @@ struct compat_statfs64 { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index b85842cda99f..8b63410e830f 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -6,6 +6,9 @@ */ #include <linux/types.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 @@ -13,13 +16,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef s16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -115,13 +114,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0x7fffffff -typedef u32 compat_old_sigset_t; - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff #ifdef CONFIG_COMPAT diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 92675dc380fa..e80ee8641ac3 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -122,6 +122,8 @@ #define SO_NETNS_COOKIE 0x0050 +#define SO_BUF_LOCK 0x0051 + #if !defined(__KERNEL__) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index be09c7eac89f..4ae01cdb99de 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -12,6 +12,9 @@ #include <asm/user32.h> #include <asm/unistd.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 @@ -19,13 +22,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -92,13 +91,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 6fd8410a3910..2dfb5fea13af 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -29,6 +29,7 @@ typedef struct { #define SA_X32_ABI 0x01000000u #ifndef CONFIG_COMPAT +#define compat_sigset_t compat_sigset_t typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 16d76f814e9b..0fe6aacef3db 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1961,6 +1961,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) stack_size += 8; /* room for return value of orig_call */ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; /* room for IP address argument */ + if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip patched call instruction and point orig_call to actual * body of the kernel function. @@ -1974,6 +1977,22 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ EMIT1(0x53); /* push rbx */ + if (flags & BPF_TRAMP_F_IP_ARG) { + /* Store IP address of the traced function: + * mov rax, QWORD PTR [rbp + 8] + * sub rax, X86_PATCH_SIZE + * mov QWORD PTR [rbp - stack_size], rax + */ + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); + EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size); + + /* Continue with stack_size for regs storage, stack will + * be correctly restored with 'leave' instruction. + */ + stack_size -= 8; + } + save_regs(m, &prog, nr_args, stack_size); if (flags & BPF_TRAMP_F_CALL_ORIG) { diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 9e4bd751db79..81ce81a75fc6 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3536,7 +3536,7 @@ static int idt77252_preset(struct idt77252_dev *card) return -1; } if (!(pci_command & PCI_COMMAND_IO)) { - printk("%s: PCI_COMMAND: %04x (???)\n", + printk("%s: PCI_COMMAND: %04x (?)\n", card->name, pci_command); deinit_card(card); return (-1); diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 09c8ab5e0959..b3691de8ac06 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -914,7 +914,8 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev) } EXPORT_SYMBOL_GPL(fsl_mc_device_remove); -struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev) +struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, + u16 if_id) { struct fsl_mc_device *mc_bus_dev, *endpoint; struct fsl_mc_obj_desc endpoint_desc = {{ 0 }}; @@ -925,6 +926,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev) mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent); strcpy(endpoint1.type, mc_dev->obj_desc.type); endpoint1.id = mc_dev->obj_desc.id; + endpoint1.if_id = if_id; err = dprc_get_connection(mc_bus_dev->mc_io, 0, mc_bus_dev->mc_handle, diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 4dd1077354af..b33b9d75e8af 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -32,6 +32,7 @@ * @edl: emergency download mode firmware path (if any) * @bar_num: PCI base address register to use for MHI MMIO register space * @dma_data_width: DMA transfer word size (32 or 64 bits) + * @mru_default: default MRU size for MBIM network packets * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead * of inband wake support (such as sdx24) */ @@ -42,6 +43,7 @@ struct mhi_pci_dev_info { const char *edl; unsigned int bar_num; unsigned int dma_data_width; + unsigned int mru_default; bool sideband_wake; }; @@ -272,6 +274,7 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, .dma_data_width = 32, + .mru_default = 32768, .sideband_wake = false, }; @@ -664,6 +667,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mhi_cntrl->status_cb = mhi_pci_status_cb; mhi_cntrl->runtime_get = mhi_pci_runtime_get; mhi_cntrl->runtime_put = mhi_pci_runtime_put; + mhi_cntrl->mru = info->mru_default; if (info->sideband_wake) { mhi_cntrl->wake_get = mhi_pci_wake_get_nop; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 6eaefea0520e..5ac53dcb3a6a 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -4050,16 +4050,15 @@ static int hdlcdev_close(struct net_device *dev) * called by network layer to process IOCTL call to network device * * dev pointer to network device structure - * ifr pointer to network interface request structure - * cmd IOCTL command code + * ifs pointer to network interface settings structure * * returns 0 if success, otherwise error code */ -static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int hdlcdev_wan_ioctl(struct net_device *dev, struct if_settings *ifs) { const size_t size = sizeof(sync_serial_settings); sync_serial_settings new_line; - sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; + sync_serial_settings __user *line = ifs->ifs_ifsu.sync; MGSLPC_INFO *info = dev_to_port(dev); unsigned int flags; @@ -4070,17 +4069,14 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (info->port.count) return -EBUSY; - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); - memset(&new_line, 0, size); - switch(ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_IFACE: /* return current sync_serial_settings */ - ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_SYNC_SERIAL; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } @@ -4148,9 +4144,8 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) tty_kref_put(tty); } return 0; - default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -4225,7 +4220,7 @@ static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = hdlcdev_ioctl, + .ndo_siocwandev = hdlcdev_wan_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index abf60f4d9203..0aa8629fdf62 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1745,10 +1745,10 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, { struct ipoib_dev_priv *priv = ipoib_priv(dev); - if (!priv->rn_ops->ndo_do_ioctl) + if (!priv->rn_ops->ndo_eth_ioctl) return -EOPNOTSUPP; - return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); + return priv->rn_ops->ndo_eth_ioctl(dev, ifr, cmd); } static int ipoib_dev_init(struct net_device *dev) @@ -2078,7 +2078,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, .ndo_get_stats64 = ipoib_get_stats, - .ndo_do_ioctl = ipoib_ioctl, + .ndo_eth_ioctl = ipoib_ioctl, }; static const struct net_device_ops ipoib_netdev_ops_vf = { @@ -2093,7 +2093,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_set_rx_mode = ipoib_set_mcast_list, .ndo_get_iflink = ipoib_get_iflink, .ndo_get_stats64 = ipoib_get_stats, - .ndo_do_ioctl = ipoib_ioctl, + .ndo_eth_ioctl = ipoib_ioctl, }; static const struct net_device_ops ipoib_netdev_default_pf = { diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 6977f8248df7..995c613086aa 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -431,10 +431,10 @@ config VSOCKMON config MHI_NET tristate "MHI network driver" depends on MHI_BUS - select WWAN help This is the network driver for MHI bus. It can be used with - QCOM based WWAN modems (like SDX55). Say Y or M. + QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55). + Say Y or M. endif # NET_CORE @@ -483,6 +483,8 @@ config NET_SB1000 source "drivers/net/phy/Kconfig" +source "drivers/net/mctp/Kconfig" + source "drivers/net/mdio/Kconfig" source "drivers/net/pcs/Kconfig" @@ -604,4 +606,11 @@ config NET_FAILOVER a VM with direct attached VF by failing over to the paravirtual datapath when the VF is unplugged. +config NETDEV_LEGACY_INIT + bool + depends on ISA + help + Drivers that call netdev_boot_setup_check() should select this + symbol, everything else no longer needs it. + endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7ffd2d03efaf..739838623cf6 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -18,7 +18,8 @@ obj-$(CONFIG_MACVLAN) += macvlan.o obj-$(CONFIG_MACVTAP) += macvtap.o obj-$(CONFIG_MII) += mii.o obj-$(CONFIG_MDIO) += mdio.o -obj-$(CONFIG_NET) += Space.o loopback.o +obj-$(CONFIG_NET) += loopback.o +obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o obj-$(CONFIG_NETCONSOLE) += netconsole.o obj-y += phy/ obj-y += mdio/ @@ -36,7 +37,7 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o obj-$(CONFIG_VSOCKMON) += vsockmon.o -obj-$(CONFIG_MHI_NET) += mhi/ +obj-$(CONFIG_MHI_NET) += mhi_net.o # # Networking Drivers @@ -69,6 +70,7 @@ obj-$(CONFIG_WAN) += wan/ obj-$(CONFIG_WLAN) += wireless/ obj-$(CONFIG_IEEE802154) += ieee802154/ obj-$(CONFIG_WWAN) += wwan/ +obj-$(CONFIG_MCTP) += mctp/ obj-$(CONFIG_VMXNET3) += vmxnet3/ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o diff --git a/drivers/net/Space.c b/drivers/net/Space.c index df79e7370bcc..49e67c9fb5a4 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -30,6 +30,148 @@ #include <linux/netlink.h> #include <net/Space.h> +/* + * This structure holds boot-time configured netdevice settings. They + * are then used in the device probing. + */ +struct netdev_boot_setup { + char name[IFNAMSIZ]; + struct ifmap map; +}; +#define NETDEV_BOOT_SETUP_MAX 8 + + +/****************************************************************************** + * + * Device Boot-time Settings Routines + * + ******************************************************************************/ + +/* Boot time configuration table */ +static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; + +/** + * netdev_boot_setup_add - add new setup entry + * @name: name of the device + * @map: configured settings for the device + * + * Adds new setup entry to the dev_boot_setup list. The function + * returns 0 on error and 1 on success. This is a generic routine to + * all netdevices. + */ +static int netdev_boot_setup_add(char *name, struct ifmap *map) +{ + struct netdev_boot_setup *s; + int i; + + s = dev_boot_setup; + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { + memset(s[i].name, 0, sizeof(s[i].name)); + strlcpy(s[i].name, name, IFNAMSIZ); + memcpy(&s[i].map, map, sizeof(s[i].map)); + break; + } + } + + return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; +} + +/** + * netdev_boot_setup_check - check boot time settings + * @dev: the netdevice + * + * Check boot time settings for the device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found, 1 if they are. + */ +int netdev_boot_setup_check(struct net_device *dev) +{ + struct netdev_boot_setup *s = dev_boot_setup; + int i; + + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && + !strcmp(dev->name, s[i].name)) { + dev->irq = s[i].map.irq; + dev->base_addr = s[i].map.base_addr; + dev->mem_start = s[i].map.mem_start; + dev->mem_end = s[i].map.mem_end; + return 1; + } + } + return 0; +} +EXPORT_SYMBOL(netdev_boot_setup_check); + +/** + * netdev_boot_base - get address from boot time settings + * @prefix: prefix for network device + * @unit: id for network device + * + * Check boot time settings for the base address of device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found. + */ +static unsigned long netdev_boot_base(const char *prefix, int unit) +{ + const struct netdev_boot_setup *s = dev_boot_setup; + char name[IFNAMSIZ]; + int i; + + sprintf(name, "%s%d", prefix, unit); + + /* + * If device already registered then return base of 1 + * to indicate not to probe for this interface + */ + if (__dev_get_by_name(&init_net, name)) + return 1; + + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) + if (!strcmp(name, s[i].name)) + return s[i].map.base_addr; + return 0; +} + +/* + * Saves at boot time configured settings for any netdevice. + */ +static int __init netdev_boot_setup(char *str) +{ + int ints[5]; + struct ifmap map; + + str = get_options(str, ARRAY_SIZE(ints), ints); + if (!str || !*str) + return 0; + + /* Save settings */ + memset(&map, 0, sizeof(map)); + if (ints[0] > 0) + map.irq = ints[1]; + if (ints[0] > 1) + map.base_addr = ints[2]; + if (ints[0] > 2) + map.mem_start = ints[3]; + if (ints[0] > 3) + map.mem_end = ints[4]; + + /* Add new entry to the list */ + return netdev_boot_setup_add(str, &map); +} + +__setup("netdev=", netdev_boot_setup); + +static int __init ether_boot_setup(char *str) +{ + return netdev_boot_setup(str); +} +__setup("ether=", ether_boot_setup); + + /* A unified ethernet device probe. This is the easiest way to have every * ethernet adaptor have the name "eth[0123...]". */ @@ -77,39 +219,15 @@ static struct devprobe2 isa_probes[] __initdata = { #ifdef CONFIG_SMC9194 {smc_init, 0}, #endif -#ifdef CONFIG_CS89x0 -#ifndef CONFIG_CS89x0_PLATFORM +#ifdef CONFIG_CS89x0_ISA {cs89x0_probe, 0}, #endif -#endif -#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET) /* Intel */ - {i82596_probe, 0}, /* I82596 */ -#endif #ifdef CONFIG_NI65 {ni65_probe, 0}, #endif {NULL, 0}, }; -static struct devprobe2 m68k_probes[] __initdata = { -#ifdef CONFIG_ATARILANCE /* Lance-based Atari ethernet boards */ - {atarilance_probe, 0}, -#endif -#ifdef CONFIG_SUN3LANCE /* sun3 onboard Lance chip */ - {sun3lance_probe, 0}, -#endif -#ifdef CONFIG_SUN3_82586 /* sun3 onboard Intel 82586 chip */ - {sun3_82586_probe, 0}, -#endif -#ifdef CONFIG_APNE /* A1200 PCMCIA NE2000 */ - {apne_probe, 0}, -#endif -#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */ - {mvme147lance_probe, 0}, -#endif - {NULL, 0}, -}; - /* Unified ethernet device probe, segmented per architecture and * per bus interface. This drives the legacy devices only for now. */ @@ -121,8 +239,7 @@ static void __init ethif_probe2(int unit) if (base_addr == 1) return; - (void)(probe_list2(unit, m68k_probes, base_addr == 0) && - probe_list2(unit, isa_probes, base_addr == 0)); + probe_list2(unit, isa_probes, base_addr == 0); } /* Statically configured drivers -- order matters here. */ @@ -130,10 +247,6 @@ static int __init net_olddevs_init(void) { int num; -#ifdef CONFIG_SBNI - for (num = 0; num < 8; ++num) - sbni_probe(num); -#endif for (num = 0; num < 8; ++num) ethif_probe2(num); @@ -142,9 +255,6 @@ static int __init net_olddevs_init(void) cops_probe(1); cops_probe(2); #endif -#ifdef CONFIG_LTPC - ltpc_probe(); -#endif return 0; } diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index 43918398f0d3..90b9f1d6eda9 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -52,7 +52,9 @@ config LTPC config COPS tristate "COPS LocalTalk PC support" - depends on DEV_APPLETALK && (ISA || EISA) + depends on DEV_APPLETALK && ISA + depends on NETDEVICES + select NETDEV_LEGACY_INIT help This allows you to use COPS AppleTalk cards to connect to LocalTalk networks. You also need version 1.3.3 or later of the netatalk diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index 51cf5eca9c7f..5566daefbff4 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -54,11 +54,12 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb, static int ipddp_create(struct ipddp_route *new_rt); static int ipddp_delete(struct ipddp_route *rt); static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt); -static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); static const struct net_device_ops ipddp_netdev_ops = { .ndo_start_xmit = ipddp_xmit, - .ndo_do_ioctl = ipddp_ioctl, + .ndo_siocdevprivate = ipddp_siocdevprivate, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -268,15 +269,18 @@ static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt) return NULL; } -static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { - struct ipddp_route __user *rt = ifr->ifr_data; struct ipddp_route rcp, rcp2, *rp; + if (in_compat_syscall()) + return -EOPNOTSUPP; + if(!capable(CAP_NET_ADMIN)) return -EPERM; - if(copy_from_user(&rcp, rt, sizeof(rcp))) + if (copy_from_user(&rcp, data, sizeof(rcp))) return -EFAULT; switch(cmd) @@ -296,7 +300,7 @@ static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) spin_unlock_bh(&ipddp_route_lock); if (rp) { - if (copy_to_user(rt, &rcp2, + if (copy_to_user(data, &rcp2, sizeof(struct ipddp_route))) return -EFAULT; return 0; diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index 69c270885ff0..1f8925e75b3f 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = { .ndo_set_rx_mode = set_multicast_list, }; -struct net_device * __init ltpc_probe(void) +static struct net_device * __init ltpc_probe(void) { struct net_device *dev; int err = -ENOMEM; @@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str) } __setup("ltpc=", ltpc_setup); -#endif /* MODULE */ +#endif static struct net_device *dev_ltpc; -#ifdef MODULE - MODULE_LICENSE("GPL"); module_param(debug, int, 0); module_param_hw(io, int, ioport, 0); @@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void) return PTR_ERR_OR_ZERO(dev_ltpc); } module_init(ltpc_module_init); -#endif static void __exit ltpc_cleanup(void) { diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6908822d9773..a4a202b9a0a2 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); -static void ad_periodic_machine(struct port *port); +static void ad_periodic_machine(struct port *port, struct bond_params bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); @@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port) /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at + * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ -static void ad_periodic_machine(struct port *port) +static void ad_periodic_machine(struct port *port, struct bond_params bond_params) { periodic_states_t last_state; @@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port) /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || - (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) - ) { + (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || + !bond_params.lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ @@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } ad_rx_machine(NULL, port); - ad_periodic_machine(port); + ad_periodic_machine(port, bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 31730efa7538..3ba5f4871162 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -732,7 +732,7 @@ static int bond_check_dev_link(struct bonding *bond, BMSR_LSTATUS : 0; /* Ethtool can't be used, fallback to MII ioctls. */ - ioctl = slave_ops->ndo_do_ioctl; + ioctl = slave_ops->ndo_eth_ioctl; if (ioctl) { /* TODO: set pointer to correct ioctl on a per team member * bases to make this more efficient. that is, once @@ -756,7 +756,7 @@ static int bond_check_dev_link(struct bonding *bond, } } - /* If reporting, report that either there's no dev->do_ioctl, + /* If reporting, report that either there's no ndo_eth_ioctl, * or both SIOCGMIIREG and get_link failed (meaning that we * cannot report link status). If not reporting, pretend * we're ok. @@ -1733,7 +1733,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (!bond->params.use_carrier && slave_dev->ethtool_ops->get_link == NULL && - slave_ops->ndo_do_ioctl == NULL) { + slave_ops->ndo_eth_ioctl == NULL) { slave_warn(bond_dev, slave_dev, "no link monitoring support\n"); } @@ -2252,7 +2252,6 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); - bond_upper_dev_unlink(bond, slave); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -2261,6 +2260,8 @@ static int __bond_release_one(struct net_device *bond_dev, if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_unbind_slave(slave); + bond_upper_dev_unlink(bond, slave); + if (bond_mode_can_use_xmit_hash(bond)) bond_update_slave_arr(bond, slave); @@ -3962,20 +3963,13 @@ static void bond_get_stats(struct net_device *bond_dev, rcu_read_unlock(); } -static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) +static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) { struct bonding *bond = netdev_priv(bond_dev); - struct net_device *slave_dev = NULL; - struct ifbond k_binfo; - struct ifbond __user *u_binfo = NULL; - struct ifslave k_sinfo; - struct ifslave __user *u_sinfo = NULL; struct mii_ioctl_data *mii = NULL; - struct bond_opt_value newval; - struct net *net; - int res = 0; + int res; - netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd); + netdev_dbg(bond_dev, "bond_eth_ioctl: cmd=%d\n", cmd); switch (cmd) { case SIOCGMIIPHY: @@ -4000,7 +3994,28 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd } return 0; - case BOND_INFO_QUERY_OLD: + default: + res = -EOPNOTSUPP; + } + + return res; +} + +static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct net_device *slave_dev = NULL; + struct ifbond k_binfo; + struct ifbond __user *u_binfo = NULL; + struct ifslave k_sinfo; + struct ifslave __user *u_sinfo = NULL; + struct bond_opt_value newval; + struct net *net; + int res = 0; + + netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd); + + switch (cmd) { case SIOCBONDINFOQUERY: u_binfo = (struct ifbond __user *)ifr->ifr_data; @@ -4012,7 +4027,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd return -EFAULT; return 0; - case BOND_SLAVE_INFO_QUERY_OLD: case SIOCBONDSLAVEINFOQUERY: u_sinfo = (struct ifslave __user *)ifr->ifr_data; @@ -4042,19 +4056,15 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd return -ENODEV; switch (cmd) { - case BOND_ENSLAVE_OLD: case SIOCBONDENSLAVE: res = bond_enslave(bond_dev, slave_dev, NULL); break; - case BOND_RELEASE_OLD: case SIOCBONDRELEASE: res = bond_release(bond_dev, slave_dev); break; - case BOND_SETHWADDR_OLD: case SIOCBONDSETHWADDR: res = bond_set_dev_addr(bond_dev, slave_dev); break; - case BOND_CHANGE_ACTIVE_OLD: case SIOCBONDCHANGEACTIVE: bond_opt_initstr(&newval, slave_dev->name); res = __bond_opt_set_notify(bond, BOND_OPT_ACTIVE_SLAVE, @@ -4067,6 +4077,29 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd return res; } +static int bond_siocdevprivate(struct net_device *bond_dev, struct ifreq *ifr, + void __user *data, int cmd) +{ + struct ifreq ifrdata = { .ifr_data = data }; + + switch (cmd) { + case BOND_INFO_QUERY_OLD: + return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDINFOQUERY); + case BOND_SLAVE_INFO_QUERY_OLD: + return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDSLAVEINFOQUERY); + case BOND_ENSLAVE_OLD: + return bond_do_ioctl(bond_dev, ifr, SIOCBONDENSLAVE); + case BOND_RELEASE_OLD: + return bond_do_ioctl(bond_dev, ifr, SIOCBONDRELEASE); + case BOND_SETHWADDR_OLD: + return bond_do_ioctl(bond_dev, ifr, SIOCBONDSETHWADDR); + case BOND_CHANGE_ACTIVE_OLD: + return bond_do_ioctl(bond_dev, ifr, SIOCBONDCHANGEACTIVE); + } + + return -EOPNOTSUPP; +} + static void bond_change_rx_flags(struct net_device *bond_dev, int change) { struct bonding *bond = netdev_priv(bond_dev); @@ -4955,7 +4988,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_start_xmit = bond_start_xmit, .ndo_select_queue = bond_select_queue, .ndo_get_stats64 = bond_get_stats, - .ndo_do_ioctl = bond_do_ioctl, + .ndo_eth_ioctl = bond_eth_ioctl, + .ndo_siocbond = bond_do_ioctl, + .ndo_siocdevprivate = bond_siocdevprivate, .ndo_change_rx_flags = bond_change_rx_flags, .ndo_set_rx_mode = bond_set_rx_mode, .ndo_change_mtu = bond_change_mtu, @@ -5443,6 +5478,7 @@ static int bond_check_params(struct bond_params *params) params->downdelay = downdelay; params->peer_notif_delay = 0; params->use_carrier = use_carrier; + params->lacp_active = 1; params->lacp_fast = lacp_fast; params->primary[0] = 0; params->primary_reselect = primary_reselect_value; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 0561ece1ba45..5d54e11d18fa 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 }, [IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 }, + [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, @@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], if (err) return err; } + + if (data[IFLA_BOND_AD_LACP_ACTIVE]) { + int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); + + bond_opt_initval(&newval, lacp_active); + err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval); + if (err) + return err; + } + if (data[IFLA_BOND_AD_LACP_RATE]) { int lacp_rate = nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); @@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */ nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ @@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb, packets_per_slave)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE, + bond->params.lacp_active)) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, bond->params.lacp_fast)) goto nla_put_failure; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 0cf25de6f46d..a8fde3bc458f 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_pps_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_select_set(struct bonding *bond, @@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = { { NULL, -1, 0} }; +static const struct bond_opt_value bond_lacp_active[] = { + { "off", 0, 0}, + { "on", 1, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0} +}; + static const struct bond_opt_value bond_lacp_rate_tbl[] = { { "slow", AD_LACP_SLOW, 0}, { "fast", AD_LACP_FAST, 0}, @@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_intmax_tbl, .set = bond_option_updelay_set }, + [BOND_OPT_LACP_ACTIVE] = { + .id = BOND_OPT_LACP_ACTIVE, + .name = "lacp_active", + .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to", + .flags = BOND_OPTFLAG_IFDOWN, + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_lacp_active, + .set = bond_option_lacp_active_set + }, [BOND_OPT_LACP_RATE] = { .id = BOND_OPT_LACP_RATE, .name = "lacp_rate", @@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond, return 0; } +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n", + newval->string, newval->value); + bond->params.lacp_active = newval->value; + + return 0; +} + static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 0fb1da361bb1..f3e3bfd72556 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq) struct ad_info ad_info; seq_puts(seq, "\n802.3ad info\n"); + seq_printf(seq, "LACP active: %s\n", + (bond->params.lacp_active) ? "on" : "off"); seq_printf(seq, "LACP rate: %s\n", (bond->params.lacp_fast) ? "fast" : "slow"); seq_printf(seq, "Min links: %d\n", bond->params.min_links); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 5f9e9a240226..b9e9842fed94 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d, static DEVICE_ATTR(peer_notif_delay, 0644, bonding_show_peer_notif_delay, bonding_sysfs_store_option); -/* Show the LACP interval. */ -static ssize_t bonding_show_lacp(struct device *d, - struct device_attribute *attr, - char *buf) +/* Show the LACP activity and interval. */ +static ssize_t bonding_show_lacp_active(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + const struct bond_opt_value *val; + + val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active); + + return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active); +} +static DEVICE_ATTR(lacp_active, 0644, + bonding_show_lacp_active, bonding_sysfs_store_option); + +static ssize_t bonding_show_lacp_rate(struct device *d, + struct device_attribute *attr, + char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; @@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d, return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast); } static DEVICE_ATTR(lacp_rate, 0644, - bonding_show_lacp, bonding_sysfs_store_option); + bonding_show_lacp_rate, bonding_sysfs_store_option); static ssize_t bonding_show_min_links(struct device *d, struct device_attribute *attr, @@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_downdelay.attr, &dev_attr_updelay.attr, &dev_attr_peer_notif_delay.attr, + &dev_attr_lacp_active.attr, &dev_attr_lacp_rate.attr, &dev_attr_ad_select.attr, &dev_attr_xmit_hash_policy.attr, diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index e355d3974977..fff259247d52 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -97,7 +97,8 @@ config CAN_AT91 config CAN_FLEXCAN tristate "Support for Freescale FLEXCAN based chips" - depends on OF && HAS_IOMEM + depends on OF || COLDFIRE || COMPILE_TEST + depends on HAS_IOMEM help Say Y here if you want to support for Freescale FlexCAN. diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 04d0bb3ffe89..b06af90a9964 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -43,14 +43,14 @@ enum at91_reg { }; /* Mailbox registers (0 <= i <= 15) */ -#define AT91_MMR(i) (enum at91_reg)(0x200 + ((i) * 0x20)) -#define AT91_MAM(i) (enum at91_reg)(0x204 + ((i) * 0x20)) -#define AT91_MID(i) (enum at91_reg)(0x208 + ((i) * 0x20)) -#define AT91_MFID(i) (enum at91_reg)(0x20C + ((i) * 0x20)) -#define AT91_MSR(i) (enum at91_reg)(0x210 + ((i) * 0x20)) -#define AT91_MDL(i) (enum at91_reg)(0x214 + ((i) * 0x20)) -#define AT91_MDH(i) (enum at91_reg)(0x218 + ((i) * 0x20)) -#define AT91_MCR(i) (enum at91_reg)(0x21C + ((i) * 0x20)) +#define AT91_MMR(i) ((enum at91_reg)(0x200 + ((i) * 0x20))) +#define AT91_MAM(i) ((enum at91_reg)(0x204 + ((i) * 0x20))) +#define AT91_MID(i) ((enum at91_reg)(0x208 + ((i) * 0x20))) +#define AT91_MFID(i) ((enum at91_reg)(0x20C + ((i) * 0x20))) +#define AT91_MSR(i) ((enum at91_reg)(0x210 + ((i) * 0x20))) +#define AT91_MDL(i) ((enum at91_reg)(0x214 + ((i) * 0x20))) +#define AT91_MDH(i) ((enum at91_reg)(0x218 + ((i) * 0x20))) +#define AT91_MCR(i) ((enum at91_reg)(0x21C + ((i) * 0x20))) /* Register bits */ #define AT91_MR_CANEN BIT(0) @@ -87,19 +87,19 @@ enum at91_mb_mode { }; /* Interrupt mask bits */ -#define AT91_IRQ_ERRA (1 << 16) -#define AT91_IRQ_WARN (1 << 17) -#define AT91_IRQ_ERRP (1 << 18) -#define AT91_IRQ_BOFF (1 << 19) -#define AT91_IRQ_SLEEP (1 << 20) -#define AT91_IRQ_WAKEUP (1 << 21) -#define AT91_IRQ_TOVF (1 << 22) -#define AT91_IRQ_TSTP (1 << 23) -#define AT91_IRQ_CERR (1 << 24) -#define AT91_IRQ_SERR (1 << 25) -#define AT91_IRQ_AERR (1 << 26) -#define AT91_IRQ_FERR (1 << 27) -#define AT91_IRQ_BERR (1 << 28) +#define AT91_IRQ_ERRA BIT(16) +#define AT91_IRQ_WARN BIT(17) +#define AT91_IRQ_ERRP BIT(18) +#define AT91_IRQ_BOFF BIT(19) +#define AT91_IRQ_SLEEP BIT(20) +#define AT91_IRQ_WAKEUP BIT(21) +#define AT91_IRQ_TOVF BIT(22) +#define AT91_IRQ_TSTP BIT(23) +#define AT91_IRQ_CERR BIT(24) +#define AT91_IRQ_SERR BIT(25) +#define AT91_IRQ_AERR BIT(26) +#define AT91_IRQ_FERR BIT(27) +#define AT91_IRQ_BERR BIT(28) #define AT91_IRQ_ERR_ALL (0x1fff0000) #define AT91_IRQ_ERR_FRAME (AT91_IRQ_CERR | AT91_IRQ_SERR | \ @@ -163,7 +163,7 @@ static const struct can_bittiming_const at91_bittiming_const = { .tseg2_min = 2, .tseg2_max = 8, .sjw_max = 4, - .brp_min = 2, + .brp_min = 2, .brp_max = 128, .brp_inc = 1, }; @@ -281,19 +281,20 @@ static inline u32 at91_read(const struct at91_priv *priv, enum at91_reg reg) } static inline void at91_write(const struct at91_priv *priv, enum at91_reg reg, - u32 value) + u32 value) { writel_relaxed(value, priv->reg_base + reg); } static inline void set_mb_mode_prio(const struct at91_priv *priv, - unsigned int mb, enum at91_mb_mode mode, int prio) + unsigned int mb, enum at91_mb_mode mode, + int prio) { at91_write(priv, AT91_MMR(mb), (mode << 24) | (prio << 16)); } static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb, - enum at91_mb_mode mode) + enum at91_mb_mode mode) { set_mb_mode_prio(priv, mb, mode, 0); } @@ -316,8 +317,7 @@ static void at91_setup_mailboxes(struct net_device *dev) unsigned int i; u32 reg_mid; - /* - * Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first + /* Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first * mailbox is disabled. The next 11 mailboxes are used as a * reception FIFO. The last mailbox is configured with * overwrite option. The overwrite flag indicates a FIFO @@ -368,7 +368,7 @@ static int at91_set_bittiming(struct net_device *dev) } static int at91_get_berr_counter(const struct net_device *dev, - struct can_berr_counter *bec) + struct can_berr_counter *bec) { const struct at91_priv *priv = netdev_priv(dev); u32 reg_ecr = at91_read(priv, AT91_ECR); @@ -423,8 +423,7 @@ static void at91_chip_stop(struct net_device *dev, enum can_state state) priv->can.state = state; } -/* - * theory of operation: +/* theory of operation: * * According to the datasheet priority 0 is the highest priority, 15 * is the lowest. If two mailboxes have the same priority level the @@ -486,8 +485,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) /* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */ can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv), 0); - /* - * we have to stop the queue and deliver all messages in case + /* we have to stop the queue and deliver all messages in case * of a prio+mb counter wrap around. This is the case if * tx_next buffer prio and mailbox equals 0. * @@ -515,6 +513,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) static inline void at91_activate_rx_low(const struct at91_priv *priv) { u32 mask = get_mb_rx_low_mask(priv); + at91_write(priv, AT91_TCR, mask); } @@ -526,9 +525,10 @@ static inline void at91_activate_rx_low(const struct at91_priv *priv) * Reenables given mailbox for reception of new CAN messages */ static inline void at91_activate_rx_mb(const struct at91_priv *priv, - unsigned int mb) + unsigned int mb) { u32 mask = 1 << mb; + at91_write(priv, AT91_TCR, mask); } @@ -568,7 +568,7 @@ static void at91_rx_overflow_err(struct net_device *dev) * given can frame. "mb" and "cf" must be valid. */ static void at91_read_mb(struct net_device *dev, unsigned int mb, - struct can_frame *cf) + struct can_frame *cf) { const struct at91_priv *priv = netdev_priv(dev); u32 reg_msr, reg_mid; @@ -582,9 +582,9 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb, reg_msr = at91_read(priv, AT91_MSR(mb)); cf->len = can_cc_dlc2len((reg_msr >> 16) & 0xf); - if (reg_msr & AT91_MSR_MRTR) + if (reg_msr & AT91_MSR_MRTR) { cf->can_id |= CAN_RTR_FLAG; - else { + } else { *(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb)); *(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb)); } @@ -685,7 +685,7 @@ static int at91_poll_rx(struct net_device *dev, int quota) if (priv->rx_next > get_mb_rx_low_last(priv) && reg_sr & get_mb_rx_low_mask(priv)) netdev_info(dev, - "order of incoming frames cannot be guaranteed\n"); + "order of incoming frames cannot be guaranteed\n"); again: for (mb = find_next_bit(addr, get_mb_tx_first(priv), priv->rx_next); @@ -718,7 +718,7 @@ static int at91_poll_rx(struct net_device *dev, int quota) } static void at91_poll_err_frame(struct net_device *dev, - struct can_frame *cf, u32 reg_sr) + struct can_frame *cf, u32 reg_sr) { struct at91_priv *priv = netdev_priv(dev); @@ -796,8 +796,7 @@ static int at91_poll(struct napi_struct *napi, int quota) if (reg_sr & get_irq_mb_rx(priv)) work_done += at91_poll_rx(dev, quota - work_done); - /* - * The error bits are clear on read, + /* The error bits are clear on read, * so use saved value from irq handler. */ reg_sr |= priv->reg_sr; @@ -807,6 +806,7 @@ static int at91_poll(struct napi_struct *napi, int quota) if (work_done < quota) { /* enable IRQs for frame errors and all mailboxes >= rx_next */ u32 reg_ier = AT91_IRQ_ERR_FRAME; + reg_ier |= get_irq_mb_rx(priv) & ~AT91_MB_MASK(priv->rx_next); napi_complete_done(napi, work_done); @@ -816,8 +816,7 @@ static int at91_poll(struct napi_struct *napi, int quota) return work_done; } -/* - * theory of operation: +/* theory of operation: * * priv->tx_echo holds the number of the oldest can_frame put for * transmission into the hardware, but not yet ACKed by the CAN tx @@ -846,8 +845,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr) /* Disable irq for this TX mailbox */ at91_write(priv, AT91_IDR, 1 << mb); - /* - * only echo if mailbox signals us a transfer + /* only echo if mailbox signals us a transfer * complete (MSR_MRDY). Otherwise it's a tansfer * abort. "can_bus_off()" takes care about the skbs * parked in the echo queue. @@ -862,8 +860,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr) } } - /* - * restart queue if we don't have a wrap around but restart if + /* restart queue if we don't have a wrap around but restart if * we get a TX int for the last can frame directly before a * wrap around. */ @@ -873,7 +870,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr) } static void at91_irq_err_state(struct net_device *dev, - struct can_frame *cf, enum can_state new_state) + struct can_frame *cf, enum can_state new_state) { struct at91_priv *priv = netdev_priv(dev); u32 reg_idr = 0, reg_ier = 0; @@ -883,8 +880,7 @@ static void at91_irq_err_state(struct net_device *dev, switch (priv->can.state) { case CAN_STATE_ERROR_ACTIVE: - /* - * from: ERROR_ACTIVE + /* from: ERROR_ACTIVE * to : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF * => : there was a warning int */ @@ -900,8 +896,7 @@ static void at91_irq_err_state(struct net_device *dev, } fallthrough; case CAN_STATE_ERROR_WARNING: - /* - * from: ERROR_ACTIVE, ERROR_WARNING + /* from: ERROR_ACTIVE, ERROR_WARNING * to : ERROR_PASSIVE, BUS_OFF * => : error passive int */ @@ -917,8 +912,7 @@ static void at91_irq_err_state(struct net_device *dev, } break; case CAN_STATE_BUS_OFF: - /* - * from: BUS_OFF + /* from: BUS_OFF * to : ERROR_ACTIVE, ERROR_WARNING, ERROR_PASSIVE */ if (new_state <= CAN_STATE_ERROR_PASSIVE) { @@ -935,12 +929,10 @@ static void at91_irq_err_state(struct net_device *dev, break; } - /* process state changes depending on the new state */ switch (new_state) { case CAN_STATE_ERROR_ACTIVE: - /* - * actually we want to enable AT91_IRQ_WARN here, but + /* actually we want to enable AT91_IRQ_WARN here, but * it screws up the system under certain * circumstances. so just enable AT91_IRQ_ERRP, thus * the "fallthrough" @@ -983,7 +975,7 @@ static void at91_irq_err_state(struct net_device *dev, } static int at91_get_state_by_bec(const struct net_device *dev, - enum can_state *state) + enum can_state *state) { struct can_berr_counter bec; int err; @@ -1004,7 +996,6 @@ static int at91_get_state_by_bec(const struct net_device *dev, return 0; } - static void at91_irq_err(struct net_device *dev) { struct at91_priv *priv = netdev_priv(dev); @@ -1018,15 +1009,15 @@ static void at91_irq_err(struct net_device *dev) reg_sr = at91_read(priv, AT91_SR); /* we need to look at the unmasked reg_sr */ - if (unlikely(reg_sr & AT91_IRQ_BOFF)) + if (unlikely(reg_sr & AT91_IRQ_BOFF)) { new_state = CAN_STATE_BUS_OFF; - else if (unlikely(reg_sr & AT91_IRQ_ERRP)) + } else if (unlikely(reg_sr & AT91_IRQ_ERRP)) { new_state = CAN_STATE_ERROR_PASSIVE; - else if (unlikely(reg_sr & AT91_IRQ_WARN)) + } else if (unlikely(reg_sr & AT91_IRQ_WARN)) { new_state = CAN_STATE_ERROR_WARNING; - else if (likely(reg_sr & AT91_IRQ_ERRA)) + } else if (likely(reg_sr & AT91_IRQ_ERRA)) { new_state = CAN_STATE_ERROR_ACTIVE; - else { + } else { netdev_err(dev, "BUG! hardware in undefined state\n"); return; } @@ -1053,8 +1044,7 @@ static void at91_irq_err(struct net_device *dev) priv->can.state = new_state; } -/* - * interrupt handler +/* interrupt handler */ static irqreturn_t at91_irq(int irq, void *dev_id) { @@ -1075,8 +1065,7 @@ static irqreturn_t at91_irq(int irq, void *dev_id) /* Receive or error interrupt? -> napi */ if (reg_sr & (get_irq_mb_rx(priv) | AT91_IRQ_ERR_FRAME)) { - /* - * The error bits are clear on read, + /* The error bits are clear on read, * save for later use. */ priv->reg_sr = reg_sr; @@ -1133,8 +1122,7 @@ static int at91_open(struct net_device *dev) return err; } -/* - * stop CAN bus activity +/* stop CAN bus activity */ static int at91_close(struct net_device *dev) { @@ -1176,8 +1164,8 @@ static const struct net_device_ops at91_netdev_ops = { .ndo_change_mtu = can_change_mtu, }; -static ssize_t at91_sysfs_show_mb0_id(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t mb0_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct at91_priv *priv = netdev_priv(to_net_dev(dev)); @@ -1187,8 +1175,9 @@ static ssize_t at91_sysfs_show_mb0_id(struct device *dev, return snprintf(buf, PAGE_SIZE, "0x%03x\n", priv->mb0_id); } -static ssize_t at91_sysfs_set_mb0_id(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t mb0_id_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct net_device *ndev = to_net_dev(dev); struct at91_priv *priv = netdev_priv(ndev); @@ -1222,7 +1211,7 @@ static ssize_t at91_sysfs_set_mb0_id(struct device *dev, return ret; } -static DEVICE_ATTR(mb0_id, 0644, at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id); +static DEVICE_ATTR_RW(mb0_id); static struct attribute *at91_sysfs_attrs[] = { &dev_attr_mb0_id.attr, diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index e38c2566aff4..147c23d7dab7 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -47,7 +47,7 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[], } if (data[IFLA_CAN_DATA_BITTIMING]) { - if (!is_can_fd || !data[IFLA_CAN_BITTIMING]) + if (!is_can_fd) return -EOPNOTSUPP; } @@ -132,10 +132,13 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], priv->ctrlmode |= maskedflags; /* CAN_CTRLMODE_FD can only be set when driver supports FD */ - if (priv->ctrlmode & CAN_CTRLMODE_FD) + if (priv->ctrlmode & CAN_CTRLMODE_FD) { dev->mtu = CANFD_MTU; - else + } else { dev->mtu = CAN_MTU; + memset(&priv->data_bittiming, 0, + sizeof(priv->data_bittiming)); + } } if (data[IFLA_CAN_RESTART_MS]) { diff --git a/drivers/net/can/dev/rx-offload.c b/drivers/net/can/dev/rx-offload.c index ab2c1543786c..37b0cc65237b 100644 --- a/drivers/net/can/dev/rx-offload.c +++ b/drivers/net/can/dev/rx-offload.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2014 Protonic Holland, * David Jander - * Copyright (C) 2014-2017 Pengutronix, + * Copyright (C) 2014-2021 Pengutronix, * Marc Kleine-Budde <kernel@pengutronix.de> */ @@ -174,10 +174,8 @@ can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pending) { - struct sk_buff_head skb_queue; unsigned int i; - - __skb_queue_head_init(&skb_queue); + int received = 0; for (i = offload->mb_first; can_rx_offload_le(offload, i, offload->mb_last); @@ -191,26 +189,12 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, if (IS_ERR_OR_NULL(skb)) continue; - __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); - } - - if (!skb_queue_empty(&skb_queue)) { - unsigned long flags; - u32 queue_len; - - spin_lock_irqsave(&offload->skb_queue.lock, flags); - skb_queue_splice_tail(&skb_queue, &offload->skb_queue); - spin_unlock_irqrestore(&offload->skb_queue.lock, flags); - - queue_len = skb_queue_len(&offload->skb_queue); - if (queue_len > offload->skb_queue_len_max / 8) - netdev_dbg(offload->dev, "%s: queue_len=%d\n", - __func__, queue_len); - - can_rx_offload_schedule(offload); + __skb_queue_add_sort(&offload->skb_irq_queue, skb, + can_rx_offload_compare); + received++; } - return skb_queue_len(&skb_queue); + return received; } EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_timestamp); @@ -226,13 +210,10 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) if (!skb) break; - skb_queue_tail(&offload->skb_queue, skb); + __skb_queue_tail(&offload->skb_irq_queue, skb); received++; } - if (received) - can_rx_offload_schedule(offload); - return received; } EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo); @@ -241,7 +222,6 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp) { struct can_rx_offload_cb *cb; - unsigned long flags; if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) { @@ -252,11 +232,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, cb = can_rx_offload_get_cb(skb); cb->timestamp = timestamp; - spin_lock_irqsave(&offload->skb_queue.lock, flags); - __skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare); - spin_unlock_irqrestore(&offload->skb_queue.lock, flags); - - can_rx_offload_schedule(offload); + __skb_queue_add_sort(&offload->skb_irq_queue, skb, + can_rx_offload_compare); return 0; } @@ -295,13 +272,56 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, return -ENOBUFS; } - skb_queue_tail(&offload->skb_queue, skb); - can_rx_offload_schedule(offload); + __skb_queue_tail(&offload->skb_irq_queue, skb); return 0; } EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail); +void can_rx_offload_irq_finish(struct can_rx_offload *offload) +{ + unsigned long flags; + int queue_len; + + if (skb_queue_empty_lockless(&offload->skb_irq_queue)) + return; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + queue_len = skb_queue_len(&offload->skb_queue); + if (queue_len > offload->skb_queue_len_max / 8) + netdev_dbg(offload->dev, "%s: queue_len=%d\n", + __func__, queue_len); + + napi_schedule(&offload->napi); +} +EXPORT_SYMBOL_GPL(can_rx_offload_irq_finish); + +void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload) +{ + unsigned long flags; + int queue_len; + + if (skb_queue_empty_lockless(&offload->skb_irq_queue)) + return; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + queue_len = skb_queue_len(&offload->skb_queue); + if (queue_len > offload->skb_queue_len_max / 8) + netdev_dbg(offload->dev, "%s: queue_len=%d\n", + __func__, queue_len); + + local_bh_disable(); + napi_schedule(&offload->napi); + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(can_rx_offload_threaded_irq_finish); + static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight) @@ -312,6 +332,7 @@ static int can_rx_offload_init_queue(struct net_device *dev, offload->skb_queue_len_max = 2 << fls(weight); offload->skb_queue_len_max *= 4; skb_queue_head_init(&offload->skb_queue); + __skb_queue_head_init(&offload->skb_irq_queue); netif_napi_add(dev, &offload->napi, can_rx_offload_napi_poll, weight); @@ -373,5 +394,6 @@ void can_rx_offload_del(struct can_rx_offload *offload) { netif_napi_del(&offload->napi); skb_queue_purge(&offload->skb_queue); + __skb_queue_purge(&offload->skb_irq_queue); } EXPORT_SYMBOL_GPL(can_rx_offload_del); diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 57f3635ad8d7..7734229aa078 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -28,6 +28,7 @@ #include <linux/of_device.h> #include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> +#include <linux/can/platform/flexcan.h> #include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> @@ -208,18 +209,19 @@ /* FLEXCAN hardware feature flags * * Below is some version info we got: - * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode + * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB * Filter? connected? Passive detection ption in MB Supported? - * MX25 FlexCAN2 03.00.00.00 no no no no no no - * MX28 FlexCAN2 03.00.04.00 yes yes no no no no - * MX35 FlexCAN2 03.00.00.00 no no no no no no - * MX53 FlexCAN2 03.00.00.00 yes no no no no no - * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no - * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes - * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes - * VF610 FlexCAN3 ? no yes no yes yes? no - * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no - * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes + * MCF5441X FlexCAN2 ? no yes no no yes no 16 + * MX25 FlexCAN2 03.00.00.00 no no no no no no 64 + * MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64 + * MX35 FlexCAN2 03.00.00.00 no no no no no no 64 + * MX53 FlexCAN2 03.00.00.00 yes no no no no no 64 + * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no 64 + * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes 64 + * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes 64 + * VF610 FlexCAN3 ? no yes no yes yes? no 64 + * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no 64 + * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes 64 * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -246,6 +248,10 @@ #define FLEXCAN_QUIRK_SUPPORT_ECC BIT(10) /* Setup stop mode with SCU firmware to support wakeup */ #define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW BIT(11) +/* Setup 3 separate interrupts, main, boff and err */ +#define FLEXCAN_QUIRK_NR_IRQ_3 BIT(12) +/* Setup 16 mailboxes */ +#define FLEXCAN_QUIRK_NR_MB_16 BIT(13) /* Structure of the message buffer */ struct flexcan_mb { @@ -363,6 +369,9 @@ struct flexcan_priv { struct regulator *reg_xceiver; struct flexcan_stop_mode stm; + int irq_boff; + int irq_err; + /* IPC handle when setup stop mode by System Controller firmware(scfw) */ struct imx_sc_ipc *sc_ipc_handle; @@ -371,6 +380,11 @@ struct flexcan_priv { void (*write)(u32 val, void __iomem *addr); }; +static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = { + .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE | + FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16, +}; + static const struct flexcan_devtype_data fsl_p1010_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | FLEXCAN_QUIRK_BROKEN_PERR_STATE | @@ -635,15 +649,19 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv) static int flexcan_clks_enable(const struct flexcan_priv *priv) { - int err; + int err = 0; - err = clk_prepare_enable(priv->clk_ipg); - if (err) - return err; + if (priv->clk_ipg) { + err = clk_prepare_enable(priv->clk_ipg); + if (err) + return err; + } - err = clk_prepare_enable(priv->clk_per); - if (err) - clk_disable_unprepare(priv->clk_ipg); + if (priv->clk_per) { + err = clk_prepare_enable(priv->clk_per); + if (err) + clk_disable_unprepare(priv->clk_ipg); + } return err; } @@ -1198,6 +1216,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) } } + if (handled) + can_rx_offload_irq_finish(&priv->offload); + return handled; } @@ -1401,8 +1422,12 @@ static int flexcan_rx_offload_setup(struct net_device *dev) priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN; else priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN; - priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) + - (sizeof(priv->regs->mb[1]) / priv->mb_size); + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_MB_16) + priv->mb_count = 16; + else + priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) + + (sizeof(priv->regs->mb[1]) / priv->mb_size); if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) priv->tx_mb_reserved = @@ -1774,6 +1799,18 @@ static int flexcan_open(struct net_device *dev) if (err) goto out_can_rx_offload_disable; + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) { + err = request_irq(priv->irq_boff, + flexcan_irq, IRQF_SHARED, dev->name, dev); + if (err) + goto out_free_irq; + + err = request_irq(priv->irq_err, + flexcan_irq, IRQF_SHARED, dev->name, dev); + if (err) + goto out_free_irq_boff; + } + flexcan_chip_interrupts_enable(dev); can_led_event(dev, CAN_LED_EVENT_OPEN); @@ -1782,6 +1819,10 @@ static int flexcan_open(struct net_device *dev) return 0; + out_free_irq_boff: + free_irq(priv->irq_boff, dev); + out_free_irq: + free_irq(dev->irq, dev); out_can_rx_offload_disable: can_rx_offload_disable(&priv->offload); flexcan_chip_stop(dev); @@ -1803,6 +1844,12 @@ static int flexcan_close(struct net_device *dev) netif_stop_queue(dev); flexcan_chip_interrupts_disable(dev); + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) { + free_irq(priv->irq_err, dev); + free_irq(priv->irq_boff, dev); + } + free_irq(dev->irq, dev); can_rx_offload_disable(&priv->offload); flexcan_chip_stop_disable_on_error(dev); @@ -2039,14 +2086,26 @@ static const struct of_device_id flexcan_of_match[] = { }; MODULE_DEVICE_TABLE(of, flexcan_of_match); +static const struct platform_device_id flexcan_id_table[] = { + { + .name = "flexcan-mcf5441x", + .driver_data = (kernel_ulong_t)&fsl_mcf5441x_devtype_data, + }, { + /* sentinel */ + }, +}; +MODULE_DEVICE_TABLE(platform, flexcan_id_table); + static int flexcan_probe(struct platform_device *pdev) { + const struct of_device_id *of_id; const struct flexcan_devtype_data *devtype_data; struct net_device *dev; struct flexcan_priv *priv; struct regulator *reg_xceiver; struct clk *clk_ipg = NULL, *clk_per = NULL; struct flexcan_regs __iomem *regs; + struct flexcan_platform_data *pdata; int err, irq; u8 clk_src = 1; u32 clock_freq = 0; @@ -2064,6 +2123,12 @@ static int flexcan_probe(struct platform_device *pdev) "clock-frequency", &clock_freq); of_property_read_u8(pdev->dev.of_node, "fsl,clk-source", &clk_src); + } else { + pdata = dev_get_platdata(&pdev->dev); + if (pdata) { + clock_freq = pdata->clock_frequency; + clk_src = pdata->clk_src; + } } if (!clock_freq) { @@ -2089,7 +2154,14 @@ static int flexcan_probe(struct platform_device *pdev) if (IS_ERR(regs)) return PTR_ERR(regs); - devtype_data = of_device_get_match_data(&pdev->dev); + of_id = of_match_device(flexcan_of_match, &pdev->dev); + if (of_id) + devtype_data = of_id->data; + else if (platform_get_device_id(pdev)->driver_data) + devtype_data = (struct flexcan_devtype_data *) + platform_get_device_id(pdev)->driver_data; + else + return -ENODEV; if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) && !(devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)) { @@ -2133,6 +2205,19 @@ static int flexcan_probe(struct platform_device *pdev) priv->devtype_data = devtype_data; priv->reg_xceiver = reg_xceiver; + if (devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) { + priv->irq_boff = platform_get_irq(pdev, 1); + if (priv->irq_boff <= 0) { + err = -ENODEV; + goto failed_platform_get_irq; + } + priv->irq_err = platform_get_irq(pdev, 2); + if (priv->irq_err <= 0) { + err = -ENODEV; + goto failed_platform_get_irq; + } + } + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) { priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO; @@ -2170,6 +2255,7 @@ static int flexcan_probe(struct platform_device *pdev) failed_register: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + failed_platform_get_irq: free_candev(dev); return err; } @@ -2322,6 +2408,7 @@ static struct platform_driver flexcan_driver = { }, .probe = flexcan_probe, .remove = flexcan_remove, + .id_table = flexcan_id_table, }; module_platform_driver(flexcan_driver); diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 2a6c918186c0..c68ad56628bd 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1815,9 +1815,9 @@ static int ican3_get_berr_counter(const struct net_device *ndev, * Sysfs Attributes */ -static ssize_t ican3_sysfs_show_term(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t termination_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); int ret; @@ -1834,9 +1834,9 @@ static ssize_t ican3_sysfs_show_term(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", mod->termination_enabled); } -static ssize_t ican3_sysfs_set_term(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t termination_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); unsigned long enable; @@ -1852,18 +1852,17 @@ static ssize_t ican3_sysfs_set_term(struct device *dev, return count; } -static ssize_t ican3_sysfs_show_fwinfo(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t fwinfo_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo); } -static DEVICE_ATTR(termination, 0644, ican3_sysfs_show_term, - ican3_sysfs_set_term); -static DEVICE_ATTR(fwinfo, 0444, ican3_sysfs_show_fwinfo, NULL); +static DEVICE_ATTR_RW(termination); +static DEVICE_ATTR_RO(fwinfo); static struct attribute *ican3_sysfs_attrs[] = { &dev_attr_termination.attr, diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index bba2a449ac70..0cffaad905c2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -21,6 +21,7 @@ #include <linux/iopoll.h> #include <linux/can/dev.h> #include <linux/pinctrl/consumer.h> +#include <linux/phy/phy.h> #include "m_can.h" @@ -1058,6 +1059,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) } } + if (cdev->is_peripheral) + can_rx_offload_threaded_irq_finish(&cdev->offload); + return IRQ_HANDLED; } @@ -1436,32 +1440,20 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) case 30: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_30X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_30X; + cdev->can.bittiming_const = &m_can_bittiming_const_30X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X; break; case 31: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_31X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = &m_can_bittiming_const_31X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; break; case 32: case 33: /* Support both MCAN version v3.2.x and v3.3.0 */ - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_31X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = &m_can_bittiming_const_31X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; cdev->can.ctrlmode_supported |= (m_can_niso_supported(cdev) ? @@ -1518,6 +1510,8 @@ static int m_can_close(struct net_device *dev) close_candev(dev); can_led_event(dev, CAN_LED_EVENT_STOP); + phy_power_off(cdev->transceiver); + return 0; } @@ -1703,10 +1697,14 @@ static int m_can_open(struct net_device *dev) struct m_can_classdev *cdev = netdev_priv(dev); int err; - err = m_can_clk_start(cdev); + err = phy_power_on(cdev->transceiver); if (err) return err; + err = m_can_clk_start(cdev); + if (err) + goto out_phy_power_off; + /* open the can device */ err = open_candev(dev); if (err) { @@ -1763,6 +1761,8 @@ out_wq_fail: close_candev(dev); exit_disable_clks: m_can_clk_stop(cdev); +out_phy_power_off: + phy_power_off(cdev->transceiver); return err; } diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index ace071c3e58c..56e994376a7b 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -28,6 +28,7 @@ #include <linux/iopoll.h> #include <linux/can/dev.h> #include <linux/pinctrl/consumer.h> +#include <linux/phy/phy.h> /* m_can lec values */ enum m_can_lec_type { @@ -82,9 +83,7 @@ struct m_can_classdev { struct workqueue_struct *tx_wq; struct work_struct tx_work; struct sk_buff *tx_skb; - - struct can_bittiming_const *bit_timing; - struct can_bittiming_const *data_timing; + struct phy *transceiver; struct m_can_ops *ops; diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index 599de0e08cd7..a28c84aa8fa8 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -6,6 +6,7 @@ // Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/ #include <linux/platform_device.h> +#include <linux/phy/phy.h> #include "m_can.h" @@ -67,6 +68,7 @@ static int m_can_plat_probe(struct platform_device *pdev) struct resource *res; void __iomem *addr; void __iomem *mram_addr; + struct phy *transceiver; int irq, ret = 0; mcan_class = m_can_class_allocate_dev(&pdev->dev, @@ -80,8 +82,7 @@ static int m_can_plat_probe(struct platform_device *pdev) if (ret) goto probe_fail; - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "m_can"); - addr = devm_ioremap_resource(&pdev->dev, res); + addr = devm_platform_ioremap_resource_byname(pdev, "m_can"); irq = platform_get_irq_byname(pdev, "int0"); if (IS_ERR(addr) || irq < 0) { ret = -EINVAL; @@ -101,6 +102,16 @@ static int m_can_plat_probe(struct platform_device *pdev) goto probe_fail; } + transceiver = devm_phy_optional_get(&pdev->dev, NULL); + if (IS_ERR(transceiver)) { + ret = PTR_ERR(transceiver); + dev_err_probe(&pdev->dev, ret, "failed to get phy\n"); + goto probe_fail; + } + + if (transceiver) + mcan_class->can.bitrate_max = transceiver->attrs.max_link_rate; + priv->base = addr; priv->mram_base = mram_addr; @@ -108,6 +119,7 @@ static int m_can_plat_probe(struct platform_device *pdev) mcan_class->pm_clock_support = 1; mcan_class->can.clock.freq = clk_get_rate(mcan_class->cclk); mcan_class->dev = &pdev->dev; + mcan_class->transceiver = transceiver; mcan_class->ops = &m_can_plat_ops; diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 84eac8cb8686..6db90dc4bc9d 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -28,6 +28,10 @@ MODULE_LICENSE("GPL v2"); #define DRV_NAME "peak_pci" +/* FPGA cards FW version registers */ +#define PEAK_VER_REG1 0x40 +#define PEAK_VER_REG2 0x44 + struct peak_pciec_card; struct peak_pci_chan { void __iomem *cfg_base; /* Common for all channels */ @@ -41,9 +45,7 @@ struct peak_pci_chan { #define PEAK_PCI_CDR (CDR_CBP | CDR_CLKOUT_MASK) #define PEAK_PCI_OCR OCR_TX0_PUSHPULL -/* - * Important PITA registers - */ +/* Important PITA registers */ #define PITA_ICR 0x00 /* Interrupt control register */ #define PITA_GPIOICR 0x18 /* GPIO interface control register */ #define PITA_MISC 0x1C /* Miscellaneous register */ @@ -70,27 +72,47 @@ static const u16 peak_pci_icr_masks[PEAK_PCI_CHAN_MAX] = { }; static const struct pci_device_id peak_pci_tbl[] = { - {PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,}, + { + PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PCI", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PCI Express", + }, { + PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-miniPCI", + }, { + PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-miniPCIe", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PC/104-Plus Quad", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PCI/104-Express", + }, { + PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-cPCI", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-Chip PCIe", + }, #ifdef CONFIG_CAN_PEAK_PCIEC - {PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + { + PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-ExpressCard", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-ExpressCard 34", + }, #endif - {0,} + { /* sentinel */ } }; MODULE_DEVICE_TABLE(pci, peak_pci_tbl); #ifdef CONFIG_CAN_PEAK_PCIEC -/* - * PCAN-ExpressCard needs I2C bit-banging configuration option. - */ +/* PCAN-ExpressCard needs I2C bit-banging configuration option. */ /* GPIOICR byte access offsets */ #define PITA_GPOUT 0x18 /* GPx output value */ @@ -156,12 +178,14 @@ static void peak_pci_write_reg(const struct sja1000_priv *priv, static inline void pita_set_scl_highz(struct peak_pciec_card *card) { u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SCL; + writeb(gp_outen, card->cfg_base + PITA_GPOEN); } static inline void pita_set_sda_highz(struct peak_pciec_card *card) { u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SDA; + writeb(gp_outen, card->cfg_base + PITA_GPOEN); } @@ -230,9 +254,7 @@ static int pita_getscl(void *data) return (readb(card->cfg_base + PITA_GPIN) & PITA_GPIN_SCL) ? 1 : 0; } -/* - * write commands to the LED chip though the I2C-bus of the PCAN-PCIeC - */ +/* write commands to the LED chip though the I2C-bus of the PCAN-PCIeC */ static int peak_pciec_write_pca9553(struct peak_pciec_card *card, u8 offset, u8 data) { @@ -248,7 +270,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card, int ret; /* cache led mask */ - if ((offset == 5) && (data == card->led_cache)) + if (offset == 5 && data == card->led_cache) return 0; ret = i2c_transfer(&card->led_chip, &msg, 1); @@ -261,9 +283,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card, return 0; } -/* - * delayed work callback used to control the LEDs - */ +/* delayed work callback used to control the LEDs */ static void peak_pciec_led_work(struct work_struct *work) { struct peak_pciec_card *card = @@ -309,9 +329,7 @@ static void peak_pciec_led_work(struct work_struct *work) schedule_delayed_work(&card->led_work, HZ); } -/* - * set LEDs blinking state - */ +/* set LEDs blinking state */ static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s) { u8 new_led = card->led_cache; @@ -328,25 +346,19 @@ static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s) peak_pciec_write_pca9553(card, 5, new_led); } -/* - * start one second delayed work to control LEDs - */ +/* start one second delayed work to control LEDs */ static void peak_pciec_start_led_work(struct peak_pciec_card *card) { schedule_delayed_work(&card->led_work, HZ); } -/* - * stop LEDs delayed work - */ +/* stop LEDs delayed work */ static void peak_pciec_stop_led_work(struct peak_pciec_card *card) { cancel_delayed_work_sync(&card->led_work); } -/* - * initialize the PCA9553 4-bit I2C-bus LED chip - */ +/* initialize the PCA9553 4-bit I2C-bus LED chip */ static int peak_pciec_init_leds(struct peak_pciec_card *card) { int err; @@ -375,17 +387,14 @@ static int peak_pciec_init_leds(struct peak_pciec_card *card) return peak_pciec_write_pca9553(card, 5, PCA9553_LS0_INIT); } -/* - * restore LEDs state to off peak_pciec_leds_exit - */ +/* restore LEDs state to off peak_pciec_leds_exit */ static void peak_pciec_leds_exit(struct peak_pciec_card *card) { /* switch LEDs to off */ peak_pciec_write_pca9553(card, 5, PCA9553_LED_OFF_ALL); } -/* - * normal write sja1000 register method overloaded to catch when controller +/* normal write sja1000 register method overloaded to catch when controller * is started or stopped, to control leds */ static void peak_pciec_write_reg(const struct sja1000_priv *priv, @@ -443,7 +452,7 @@ static int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev) /* channel is the first one: do the init part */ } else { /* create the bit banging I2C adapter structure */ - card = kzalloc(sizeof(struct peak_pciec_card), GFP_KERNEL); + card = kzalloc(sizeof(*card), GFP_KERNEL); if (!card) return -ENOMEM; @@ -506,9 +515,7 @@ static void peak_pciec_remove(struct peak_pciec_card *card) #else /* CONFIG_CAN_PEAK_PCIEC */ -/* - * Placebo functions when PCAN-ExpressCard support is not selected - */ +/* Placebo functions when PCAN-ExpressCard support is not selected */ static inline int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev) { return -ENODEV; @@ -549,6 +556,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) void __iomem *cfg_base, *reg_base; u16 sub_sys_id, icr; int i, err, channels; + char fw_str[14] = ""; err = pci_enable_device(pdev); if (err) @@ -602,6 +610,21 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Leave parport mux mode */ writeb(0x04, cfg_base + PITA_MISC + 3); + /* FPGA equipped card if not 0 */ + if (readl(cfg_base + PEAK_VER_REG1)) { + /* FPGA card: display version of the running firmware */ + u32 fw_ver = readl(cfg_base + PEAK_VER_REG2); + + snprintf(fw_str, sizeof(fw_str), " FW v%u.%u.%u", + (fw_ver >> 12) & 0xf, + (fw_ver >> 8) & 0xf, + (fw_ver >> 4) & 0xf); + } + + /* Display commercial name (and, eventually, FW version) of the card */ + dev_info(&pdev->dev, "%ux CAN %s%s\n", + channels, (const char *)ent->driver_data, fw_str); + icr = readw(cfg_base + PITA_ICR + 2); for (i = 0; i < channels; i++) { @@ -642,8 +665,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) chan->prev_dev = pci_get_drvdata(pdev); pci_set_drvdata(pdev, dev); - /* - * PCAN-ExpressCard needs some additional i2c init. + /* PCAN-ExpressCard needs some additional i2c init. * This must be done *before* register_sja1000dev() but * *after* devices linkage */ @@ -709,7 +731,8 @@ failure_disable_pci: /* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while * the probe() function must return a negative errno in case of failure - * (err is unchanged if negative) */ + * (err is unchanged if negative) + */ return pcibios_err_to_errno(err); } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 9ae48072b6c6..6c369a399c45 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -15,10 +15,10 @@ #include <linux/bitfield.h> #include <linux/clk.h> #include <linux/device.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> -#include <linux/of.h> -#include <linux/of_device.h> #include <linux/pm_runtime.h> +#include <linux/property.h> #include <asm/unaligned.h> @@ -2195,8 +2195,10 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) FIELD_GET(MCP251XFD_REG_INT_IE_MASK, priv->regs_status.intf); - if (!(intf_pending)) + if (!(intf_pending)) { + can_rx_offload_threaded_irq_finish(&priv->offload); return handled; + } /* Some interrupts must be ACKed in the * MCP251XFD_REG_INT register. @@ -2296,6 +2298,8 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) } while (1); out_fail: + can_rx_offload_threaded_irq_finish(&priv->offload); + netdev_err(priv->ndev, "IRQ handler returned %d (intf=0x%08x).\n", err, priv->regs_status.intf); mcp251xfd_dump(priv); @@ -2524,8 +2528,8 @@ static int mcp251xfd_open(struct net_device *ndev) can_rx_offload_enable(&priv->offload); err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq, - IRQF_ONESHOT, dev_name(&spi->dev), - priv); + IRQF_SHARED | IRQF_ONESHOT, + dev_name(&spi->dev), priv); if (err) goto out_can_rx_offload_disable; @@ -2857,7 +2861,7 @@ static int mcp251xfd_probe(struct spi_device *spi) struct gpio_desc *rx_int; struct regulator *reg_vdd, *reg_xceiver; struct clk *clk; - u32 freq; + u32 freq = 0; int err; if (!spi->irq) @@ -2884,11 +2888,19 @@ static int mcp251xfd_probe(struct spi_device *spi) return dev_err_probe(&spi->dev, PTR_ERR(reg_xceiver), "Failed to get Transceiver regulator!\n"); - clk = devm_clk_get(&spi->dev, NULL); + clk = devm_clk_get_optional(&spi->dev, NULL); if (IS_ERR(clk)) return dev_err_probe(&spi->dev, PTR_ERR(clk), "Failed to get Oscillator (clock)!\n"); - freq = clk_get_rate(clk); + if (clk) { + freq = clk_get_rate(clk); + } else { + err = device_property_read_u32(&spi->dev, "clock-frequency", + &freq); + if (err) + return dev_err_probe(&spi->dev, err, + "Failed to get clock-frequency!\n"); + } /* Sanity check */ if (freq < MCP251XFD_SYSCLOCK_HZ_MIN || diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 73245d8836a9..353062ead98f 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -786,6 +786,8 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) int_status = hecc_read(priv, HECC_CANGIF0); } + can_rx_offload_irq_finish(&priv->offload); + return IRQ_HANDLED; } diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 66fa8b07c2e6..7370981e9b34 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -476,7 +476,7 @@ static void esd_usb2_write_bulk_callback(struct urb *urb) netif_trans_update(netdev); } -static ssize_t show_firmware(struct device *d, +static ssize_t firmware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); @@ -487,9 +487,9 @@ static ssize_t show_firmware(struct device *d, (dev->version >> 8) & 0xf, dev->version & 0xff); } -static DEVICE_ATTR(firmware, 0444, show_firmware, NULL); +static DEVICE_ATTR_RO(firmware); -static ssize_t show_hardware(struct device *d, +static ssize_t hardware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); @@ -500,9 +500,9 @@ static ssize_t show_hardware(struct device *d, (dev->version >> 24) & 0xf, (dev->version >> 16) & 0xff); } -static DEVICE_ATTR(hardware, 0444, show_hardware, NULL); +static DEVICE_ATTR_RO(hardware); -static ssize_t show_nets(struct device *d, +static ssize_t nets_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); @@ -510,7 +510,7 @@ static ssize_t show_nets(struct device *d, return sprintf(buf, "%d", dev->net_count); } -static DEVICE_ATTR(nets, 0444, show_nets, NULL); +static DEVICE_ATTR_RO(nets); static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg) { diff --git a/drivers/net/can/usb/etas_es58x/es581_4.c b/drivers/net/can/usb/etas_es58x/es581_4.c index 1985f772fc3c..14e360c9f2c9 100644 --- a/drivers/net/can/usb/etas_es58x/es581_4.c +++ b/drivers/net/can/usb/etas_es58x/es581_4.c @@ -355,7 +355,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv, return -EMSGSIZE; if (priv->tx_can_msg_cnt == 0) { - msg_len = 1; /* struct es581_4_bulk_tx_can_msg:num_can_msg */ + msg_len = sizeof(es581_4_urb_cmd->bulk_tx_can_msg.num_can_msg); es581_4_fill_urb_header(urb_cmd, ES581_4_CAN_COMMAND_TYPE, ES581_4_CMD_ID_TX_MSG, priv->channel_idx, msg_len); @@ -371,8 +371,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv, return ret; /* Fill message contents. */ - tx_can_msg = (struct es581_4_tx_can_msg *) - &es581_4_urb_cmd->bulk_tx_can_msg.tx_can_msg_buf[msg_len - 1]; + tx_can_msg = (typeof(tx_can_msg))&es581_4_urb_cmd->raw_msg[msg_len]; put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id); put_unaligned_le32(priv->tx_head, &tx_can_msg->packet_idx); put_unaligned_le16((u16)es58x_get_flags(skb), &tx_can_msg->flags); diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 8e9102482c52..96a13c770e4a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -19,7 +19,7 @@ #include "es58x_core.h" #define DRV_VERSION "1.00" -MODULE_AUTHOR("Mailhol Vincent <mailhol.vincent@wanadoo.fr>"); +MODULE_AUTHOR("Vincent Mailhol <mailhol.vincent@wanadoo.fr>"); MODULE_AUTHOR("Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>"); MODULE_DESCRIPTION("Socket CAN driver for ETAS ES58X USB adapters"); MODULE_VERSION(DRV_VERSION); @@ -70,7 +70,7 @@ MODULE_DEVICE_TABLE(usb, es58x_id_table); * bytes (the start of frame) are skipped and the CRC calculation * starts on the third byte. */ -#define ES58X_CRC_CALC_OFFSET 2 +#define ES58X_CRC_CALC_OFFSET sizeof_field(union es58x_urb_cmd, sof) /** * es58x_calculate_crc() - Compute the crc16 of a given URB. @@ -2108,6 +2108,25 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) } /** + * es58x_free_netdevs() - Release all network resources of the device. + * @es58x_dev: ES58X device. + */ +static void es58x_free_netdevs(struct es58x_device *es58x_dev) +{ + int i; + + for (i = 0; i < es58x_dev->num_can_ch; i++) { + struct net_device *netdev = es58x_dev->netdev[i]; + + if (!netdev) + continue; + unregister_candev(netdev); + es58x_dev->netdev[i] = NULL; + free_candev(netdev); + } +} + +/** * es58x_get_product_info() - Get the product information and print them. * @es58x_dev: ES58X device. * @@ -2152,14 +2171,13 @@ static int es58x_get_product_info(struct es58x_device *es58x_dev) /** * es58x_init_es58x_dev() - Initialize the ES58X device. * @intf: USB interface. - * @p_es58x_dev: pointer to the address of the ES58X device. * @driver_info: Quirks of the device. * - * Return: zero on success, errno when any error occurs. + * Return: pointer to an ES58X device on success, error pointer when + * any error occurs. */ -static int es58x_init_es58x_dev(struct usb_interface *intf, - struct es58x_device **p_es58x_dev, - kernel_ulong_t driver_info) +static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, + kernel_ulong_t driver_info) { struct device *dev = &intf->dev; struct es58x_device *es58x_dev; @@ -2176,7 +2194,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf, ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, NULL, NULL); if (ret) - return ret; + return ERR_PTR(ret); if (driver_info & ES58X_FD_FAMILY) { param = &es58x_fd_param; @@ -2186,9 +2204,10 @@ static int es58x_init_es58x_dev(struct usb_interface *intf, ops = &es581_4_ops; } - es58x_dev = kzalloc(es58x_sizeof_es58x_device(param), GFP_KERNEL); + es58x_dev = devm_kzalloc(dev, es58x_sizeof_es58x_device(param), + GFP_KERNEL); if (!es58x_dev) - return -ENOMEM; + return ERR_PTR(-ENOMEM); es58x_dev->param = param; es58x_dev->ops = ops; @@ -2213,9 +2232,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf, ep_out->bEndpointAddress); es58x_dev->rx_max_packet_size = le16_to_cpu(ep_in->wMaxPacketSize); - *p_es58x_dev = es58x_dev; - - return 0; + return es58x_dev; } /** @@ -2232,30 +2249,21 @@ static int es58x_probe(struct usb_interface *intf, struct es58x_device *es58x_dev; int ch_idx, ret; - ret = es58x_init_es58x_dev(intf, &es58x_dev, id->driver_info); - if (ret) - return ret; + es58x_dev = es58x_init_es58x_dev(intf, id->driver_info); + if (IS_ERR(es58x_dev)) + return PTR_ERR(es58x_dev); ret = es58x_get_product_info(es58x_dev); if (ret) - goto cleanup_es58x_dev; + return ret; for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) { ret = es58x_init_netdev(es58x_dev, ch_idx); - if (ret) - goto cleanup_candev; - } - - return ret; - - cleanup_candev: - for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) - if (es58x_dev->netdev[ch_idx]) { - unregister_candev(es58x_dev->netdev[ch_idx]); - free_candev(es58x_dev->netdev[ch_idx]); + if (ret) { + es58x_free_netdevs(es58x_dev); + return ret; } - cleanup_es58x_dev: - kfree(es58x_dev); + } return ret; } @@ -2270,24 +2278,12 @@ static int es58x_probe(struct usb_interface *intf, static void es58x_disconnect(struct usb_interface *intf) { struct es58x_device *es58x_dev = usb_get_intfdata(intf); - struct net_device *netdev; - int i; dev_info(&intf->dev, "Disconnecting %s %s\n", es58x_dev->udev->manufacturer, es58x_dev->udev->product); - for (i = 0; i < es58x_dev->num_can_ch; i++) { - netdev = es58x_dev->netdev[i]; - if (!netdev) - continue; - unregister_candev(netdev); - es58x_dev->netdev[i] = NULL; - free_candev(netdev); - } - + es58x_free_netdevs(es58x_dev); es58x_free_urbs(es58x_dev); - - kfree(es58x_dev); usb_set_intfdata(intf, NULL); } diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h index fcf219e727bf..826a15871573 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.h +++ b/drivers/net/can/usb/etas_es58x/es58x_core.h @@ -287,7 +287,7 @@ struct es58x_priv { * @rx_urb_cmd_max_len: Maximum length of a RX URB command. * @fifo_mask: Bit mask to quickly convert the tx_tail and tx_head * field of the struct es58x_priv into echo_skb - * indexes. Properties: @fifo_mask = echos_skb_max - 1 where + * indexes. Properties: @fifo_mask = echo_skb_max - 1 where * echo_skb_max must be a power of two. Also, echo_skb_max must * not exceed the maximum size of the device internal TX FIFO * length. This parameter is used to control the network queue diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c index 1a2779d383a4..af042aa55f59 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_fd.c +++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c @@ -357,8 +357,7 @@ static int es58x_fd_tx_can_msg(struct es58x_priv *priv, return ret; /* Fill message contents. */ - tx_can_msg = (struct es58x_fd_tx_can_msg *) - &es58x_fd_urb_cmd->tx_can_msg_buf[msg_len]; + tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len]; tx_can_msg->packet_idx = (u8)priv->tx_head; put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id); tx_can_msg->flags = (u8)es58x_get_flags(skb); @@ -463,9 +462,9 @@ static int es58x_fd_get_timestamp(struct es58x_device *es58x_dev) } /* Nominal bittiming constants for ES582.1 and ES584.1 as specified in - * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family" - * section 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register" - * from Microchip. + * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section + * 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register" from + * Microchip. * * The values from the specification are the hardware register * values. To convert them to the functional values, all ranges were @@ -484,8 +483,8 @@ static const struct can_bittiming_const es58x_fd_nom_bittiming_const = { }; /* Data bittiming constants for ES582.1 and ES584.1 as specified in - * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family" - * section 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from + * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section + * 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from * Microchip. */ static const struct can_bittiming_const es58x_fd_data_bittiming_const = { @@ -501,9 +500,9 @@ static const struct can_bittiming_const es58x_fd_data_bittiming_const = { }; /* Transmission Delay Compensation constants for ES582.1 and ES584.1 - * as specified in the microcontroller datasheet: "SAM - * E701/S70/V70/V71 Family" section 49.6.15 "MCAN Transmitter Delay - * Compensation Register" from Microchip. + * as specified in the microcontroller datasheet: "SAM E70/S70/V70/V71 + * Family" section 49.6.15 "MCAN Transmitter Delay Compensation + * Register" from Microchip. */ static const struct can_tdc_const es58x_tdc_const = { .tdcv_max = 0, /* Manual mode not supported. */ diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 899a3d21b77f..837b3fecd71e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -63,6 +63,8 @@ #define PCAN_USB_MSG_HEADER_LEN 2 +#define PCAN_USB_MSG_TX_CAN 2 /* Tx msg is a CAN frame */ + /* PCAN-USB adapter internal clock (MHz) */ #define PCAN_USB_CRYSTAL_HZ 16000000 @@ -73,6 +75,10 @@ #define PCAN_USB_STATUSLEN_RTR (1 << 4) #define PCAN_USB_STATUSLEN_DLC (0xf) +/* PCAN-USB 4.1 CAN Id tx extended flags */ +#define PCAN_USB_TX_SRR 0x01 /* SJA1000 SRR command */ +#define PCAN_USB_TX_AT 0x02 /* SJA1000 AT command */ + /* PCAN-USB error flags */ #define PCAN_USB_ERROR_TXFULL 0x01 #define PCAN_USB_ERROR_RXQOVR 0x02 @@ -385,7 +391,8 @@ static int pcan_usb_get_device_id(struct peak_usb_device *dev, u32 *device_id) if (err) netdev_err(dev->netdev, "getting device id failure: %d\n", err); - *device_id = args[0]; + else + *device_id = args[0]; return err; } @@ -446,145 +453,65 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, { struct sk_buff *skb; struct can_frame *cf; - enum can_state new_state; + enum can_state new_state = CAN_STATE_ERROR_ACTIVE; /* ignore this error until 1st ts received */ if (n == PCAN_USB_ERROR_QOVR) if (!mc->pdev->time_ref.tick_count) return 0; - new_state = mc->pdev->dev.can.state; - - switch (mc->pdev->dev.can.state) { - case CAN_STATE_ERROR_ACTIVE: - if (n & PCAN_USB_ERROR_BUS_LIGHT) { - new_state = CAN_STATE_ERROR_WARNING; - break; - } - fallthrough; - - case CAN_STATE_ERROR_WARNING: - if (n & PCAN_USB_ERROR_BUS_HEAVY) { - new_state = CAN_STATE_ERROR_PASSIVE; - break; - } - if (n & PCAN_USB_ERROR_BUS_OFF) { - new_state = CAN_STATE_BUS_OFF; - break; - } - if (n & ~PCAN_USB_ERROR_BUS) { - /* - * trick to bypass next comparison and process other - * errors - */ - new_state = CAN_STATE_MAX; - break; - } - if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) { - /* no error (back to active state) */ - new_state = CAN_STATE_ERROR_ACTIVE; - break; - } - break; - - case CAN_STATE_ERROR_PASSIVE: - if (n & PCAN_USB_ERROR_BUS_OFF) { - new_state = CAN_STATE_BUS_OFF; - break; - } - if (n & PCAN_USB_ERROR_BUS_LIGHT) { - new_state = CAN_STATE_ERROR_WARNING; - break; - } - if (n & ~PCAN_USB_ERROR_BUS) { - /* - * trick to bypass next comparison and process other - * errors - */ - new_state = CAN_STATE_MAX; - break; - } - - if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) { - /* no error (back to warning state) */ - new_state = CAN_STATE_ERROR_WARNING; - break; - } - break; - - default: - /* do nothing waiting for restart */ - return 0; - } - - /* donot post any error if current state didn't change */ - if (mc->pdev->dev.can.state == new_state) - return 0; - /* allocate an skb to store the error frame */ skb = alloc_can_err_skb(mc->netdev, &cf); - if (!skb) - return -ENOMEM; - - switch (new_state) { - case CAN_STATE_BUS_OFF: - cf->can_id |= CAN_ERR_BUSOFF; - mc->pdev->dev.can.can_stats.bus_off++; - can_bus_off(mc->netdev); - break; - - case CAN_STATE_ERROR_PASSIVE: - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ? - CAN_ERR_CRTL_TX_PASSIVE : - CAN_ERR_CRTL_RX_PASSIVE; - cf->data[6] = mc->pdev->bec.txerr; - cf->data[7] = mc->pdev->bec.rxerr; - - mc->pdev->dev.can.can_stats.error_passive++; - break; - - case CAN_STATE_ERROR_WARNING: - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - cf->data[6] = mc->pdev->bec.txerr; - cf->data[7] = mc->pdev->bec.rxerr; - - mc->pdev->dev.can.can_stats.error_warning++; - break; - case CAN_STATE_ERROR_ACTIVE: - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = CAN_ERR_CRTL_ACTIVE; - - /* sync local copies of rxerr/txerr counters */ - mc->pdev->bec.txerr = 0; - mc->pdev->bec.rxerr = 0; - break; - - default: - /* CAN_STATE_MAX (trick to handle other errors) */ - if (n & PCAN_USB_ERROR_TXQFULL) - netdev_dbg(mc->netdev, "device Tx queue full)\n"); - - if (n & PCAN_USB_ERROR_RXQOVR) { - netdev_dbg(mc->netdev, "data overrun interrupt\n"); + if (n & PCAN_USB_ERROR_RXQOVR) { + /* data overrun interrupt */ + netdev_dbg(mc->netdev, "data overrun interrupt\n"); + mc->netdev->stats.rx_over_errors++; + mc->netdev->stats.rx_errors++; + if (cf) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; - mc->netdev->stats.rx_over_errors++; - mc->netdev->stats.rx_errors++; } + } - cf->data[6] = mc->pdev->bec.txerr; - cf->data[7] = mc->pdev->bec.rxerr; + if (n & PCAN_USB_ERROR_TXQFULL) + netdev_dbg(mc->netdev, "device Tx queue full)\n"); - new_state = mc->pdev->dev.can.state; - break; + if (n & PCAN_USB_ERROR_BUS_OFF) { + new_state = CAN_STATE_BUS_OFF; + } else if (n & PCAN_USB_ERROR_BUS_HEAVY) { + new_state = ((mc->pdev->bec.txerr >= 128) || + (mc->pdev->bec.rxerr >= 128)) ? + CAN_STATE_ERROR_PASSIVE : + CAN_STATE_ERROR_WARNING; + } else { + new_state = CAN_STATE_ERROR_ACTIVE; } - mc->pdev->dev.can.state = new_state; + /* handle change of state */ + if (new_state != mc->pdev->dev.can.state) { + enum can_state tx_state = + (mc->pdev->bec.txerr >= mc->pdev->bec.rxerr) ? + new_state : 0; + enum can_state rx_state = + (mc->pdev->bec.txerr <= mc->pdev->bec.rxerr) ? + new_state : 0; + + can_change_state(mc->netdev, cf, tx_state, rx_state); + + if (new_state == CAN_STATE_BUS_OFF) { + can_bus_off(mc->netdev); + } else if (cf && (cf->can_id & CAN_ERR_CRTL)) { + /* Supply TX/RX error counters in case of + * controller error. + */ + cf->data[6] = mc->pdev->bec.txerr; + cf->data[7] = mc->pdev->bec.rxerr; + } + } + + if (!skb) + return -ENOMEM; if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) { struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb); @@ -706,6 +633,7 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) struct sk_buff *skb; struct can_frame *cf; struct skb_shared_hwtstamps *hwts; + u32 can_id_flags; skb = alloc_can_skb(mc->netdev, &cf); if (!skb) @@ -715,13 +643,15 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) if ((mc->ptr + 4) > mc->end) goto decode_failed; - cf->can_id = get_unaligned_le32(mc->ptr) >> 3 | CAN_EFF_FLAG; + can_id_flags = get_unaligned_le32(mc->ptr); + cf->can_id = can_id_flags >> 3 | CAN_EFF_FLAG; mc->ptr += 4; } else { if ((mc->ptr + 2) > mc->end) goto decode_failed; - cf->can_id = get_unaligned_le16(mc->ptr) >> 5; + can_id_flags = get_unaligned_le16(mc->ptr); + cf->can_id = can_id_flags >> 5; mc->ptr += 2; } @@ -744,6 +674,10 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) memcpy(cf->data, mc->ptr, cf->len); mc->ptr += rec_len; + + /* Ignore next byte (client private id) if SRR bit is set */ + if (can_id_flags & PCAN_USB_TX_SRR) + mc->ptr++; } /* convert timestamp into kernel time */ @@ -821,10 +755,11 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, struct net_device *netdev = dev->netdev; struct net_device_stats *stats = &netdev->stats; struct can_frame *cf = (struct can_frame *)skb->data; + u32 can_id_flags = cf->can_id & CAN_ERR_MASK; u8 *pc; - obuf[0] = 2; - obuf[1] = 1; + obuf[0] = PCAN_USB_MSG_TX_CAN; + obuf[1] = 1; /* only one CAN frame is stored in the packet */ pc = obuf + PCAN_USB_MSG_HEADER_LEN; @@ -839,12 +774,28 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, *pc |= PCAN_USB_STATUSLEN_EXT_ID; pc++; - put_unaligned_le32((cf->can_id & CAN_ERR_MASK) << 3, pc); + can_id_flags <<= 3; + + if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) + can_id_flags |= PCAN_USB_TX_SRR; + + if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + can_id_flags |= PCAN_USB_TX_AT; + + put_unaligned_le32(can_id_flags, pc); pc += 4; } else { pc++; - put_unaligned_le16((cf->can_id & CAN_ERR_MASK) << 5, pc); + can_id_flags <<= 5; + + if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) + can_id_flags |= PCAN_USB_TX_SRR; + + if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + can_id_flags |= PCAN_USB_TX_AT; + + put_unaligned_le16(can_id_flags, pc); pc += 2; } @@ -854,6 +805,10 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, pc += cf->len; } + /* SRR bit needs a writer id (useless here) */ + if (can_id_flags & PCAN_USB_TX_SRR) + *pc++ = 0x80; + obuf[(*size)-1] = (u8)(stats->tx_packets & 0xff); return 0; @@ -928,6 +883,19 @@ static int pcan_usb_init(struct peak_usb_device *dev) return err; } + /* Since rev 4.1, PCAN-USB is able to make single-shot as well as + * looped back frames. + */ + if (dev->device_rev >= 41) { + struct can_priv *priv = netdev_priv(dev->netdev); + + priv->ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT | + CAN_CTRLMODE_LOOPBACK; + } else { + dev_info(dev->netdev->dev.parent, + "Firmware update available. Please contact support@peak-system.com\n"); + } + dev_info(dev->netdev->dev.parent, "PEAK-System %s adapter hwrev %u serial %08X (%u channel)\n", pcan_usb.name, dev->device_rev, serial_number, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 69f21b71614c..385e169080d9 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,8 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; - if (vid > 1) - reg[1] |= ATA2_IVL; + reg[1] |= ATA2_IVL; + reg[1] |= ATA2_FID(FID_BRIDGED); reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't @@ -1021,6 +1021,10 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) mt7530_write(priv, MT7530_PCR_P(port), PCR_MATRIX(dsa_user_ports(priv->ds))); + /* Set to fallback mode for independent VLAN learning */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + return 0; } @@ -1143,7 +1147,8 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state) break; } - mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state); + mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED), + FID_PST(FID_BRIDGED, stp_state)); } static int @@ -1229,6 +1234,10 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port, PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap)); priv->ports[port].pm |= PCR_MATRIX(port_bitmap); + /* Set to fallback mode for independent VLAN learning */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + mutex_unlock(&priv->reg_mutex); return 0; @@ -1241,16 +1250,21 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) bool all_user_ports_removed = true; int i; - /* When a port is removed from the bridge, the port would be set up - * back to the default as is at initial boot which is a VLAN-unaware - * port. + /* This is called after .port_bridge_leave when leaving a VLAN-aware + * bridge. Don't set standalone ports to fallback mode. */ - mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, - MT7530_PORT_MATRIX_MODE); + if (dsa_to_port(ds, port)->bridge_dev) + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, VLAN_ATTR(MT7530_VLAN_TRANSPARENT) | PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + /* Set PVID to 0 */ + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); + for (i = 0; i < MT7530_NUM_PORTS; i++) { if (dsa_is_user_port(ds, i) && dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) { @@ -1276,15 +1290,14 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) struct mt7530_priv *priv = ds->priv; /* Trapped into security mode allows packet forwarding through VLAN - * table lookup. CPU port is set to fallback mode to let untagged - * frames pass through. + * table lookup. */ - if (dsa_is_cpu_port(ds, port)) - mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, - MT7530_PORT_FALLBACK_MODE); - else + if (dsa_is_user_port(ds, port)) { mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, MT7530_PORT_SECURITY_MODE); + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID(priv->ports[port].pvid)); + } /* Set the port as a user port which is to be able to recognize VID * from incoming packets before fetching entry within the VLAN table. @@ -1329,6 +1342,13 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, PCR_MATRIX(BIT(MT7530_CPU_PORT))); priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT)); + /* When a port is removed from the bridge, the port would be set up + * back to the default as is at initial boot which is a VLAN-unaware + * port. + */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_MATRIX_MODE); + mutex_unlock(&priv->reg_mutex); } @@ -1511,7 +1531,8 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv, /* Validate the entry with independent learning, create egress tag per * VLAN and joining the port as one of the port members. */ - val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID; + val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) | + VLAN_VALID; mt7530_write(priv, MT7530_VAWD1, val); /* Decide whether adding tag or not for those outgoing packets from the @@ -1601,9 +1622,13 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port, mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add); if (pvid) { - mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, - G0_PORT_VID(vlan->vid)); priv->ports[port].pvid = vlan->vid; + + /* Only configure PVID if VLAN filtering is enabled */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PPBV1_P(port), + G0_PORT_VID_MASK, + G0_PORT_VID(vlan->vid)); } mutex_unlock(&priv->reg_mutex); @@ -1617,11 +1642,9 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, { struct mt7530_hw_vlan_entry target_entry; struct mt7530_priv *priv = ds->priv; - u16 pvid; mutex_lock(&priv->reg_mutex); - pvid = priv->ports[port].pvid; mt7530_hw_vlan_entry_init(&target_entry, port, 0); mt7530_hw_vlan_update(priv, vlan->vid, &target_entry, mt7530_hw_vlan_del); @@ -1629,11 +1652,12 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, /* PVID is being restored to the default whenever the PVID port * is being removed from the VLAN. */ - if (pvid == vlan->vid) - pvid = G0_PORT_VID_DEF; + if (priv->ports[port].pvid == vlan->vid) { + priv->ports[port].pvid = G0_PORT_VID_DEF; + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); + } - mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid); - priv->ports[port].pvid = pvid; mutex_unlock(&priv->reg_mutex); @@ -1717,15 +1741,7 @@ static enum dsa_tag_protocol mtk_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) { - struct mt7530_priv *priv = ds->priv; - - if (port != MT7530_CPU_PORT) { - dev_warn(priv->dev, - "port not matched with tagging CPU port\n"); - return DSA_TAG_PROTO_NONE; - } else { - return DSA_TAG_PROTO_MTK; - } + return DSA_TAG_PROTO_MTK; } #ifdef CONFIG_GPIOLIB @@ -2054,6 +2070,7 @@ mt7530_setup(struct dsa_switch *ds) * as two netdev instances. */ dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent; + ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; if (priv->id == ID_MT7530) { @@ -2124,6 +2141,9 @@ mt7530_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); + /* Disable learning by default on all ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + if (dsa_is_cpu_port(ds, i)) { ret = mt753x_cpu_port_enable(ds, i); if (ret) @@ -2131,8 +2151,9 @@ mt7530_setup(struct dsa_switch *ds) } else { mt7530_port_disable(ds, i); - /* Disable learning by default on all user ports */ - mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + /* Set default PVID to 0 on all user ports */ + mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } /* Enable consistent egress tag */ mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, @@ -2289,6 +2310,9 @@ mt7531_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); + /* Disable learning by default on all ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR); if (dsa_is_cpu_port(ds, i)) { @@ -2298,8 +2322,9 @@ mt7531_setup(struct dsa_switch *ds) } else { mt7530_port_disable(ds, i); - /* Disable learning by default on all user ports */ - mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + /* Set default PVID to 0 on all user ports */ + mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } /* Enable consistent egress tag */ @@ -2307,6 +2332,7 @@ mt7531_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; /* Flush the FDB table */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index b19b389ff10a..4a91d80f51bb 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -80,6 +80,7 @@ enum mt753x_bpdu_port_fw { #define STATIC_ENT 3 #define MT7530_ATA2 0x78 #define ATA2_IVL BIT(15) +#define ATA2_FID(x) (((x) & 0x7) << 12) /* Register for address table write data */ #define MT7530_ATWD 0x7c @@ -148,11 +149,18 @@ enum mt7530_vlan_cmd { #define VTAG_EN BIT(28) /* VLAN Member Control */ #define PORT_MEM(x) (((x) & 0xff) << 16) +/* Filter ID */ +#define FID(x) (((x) & 0x7) << 1) /* VLAN Entry Valid */ #define VLAN_VALID BIT(0) #define PORT_MEM_SHFT 16 #define PORT_MEM_MASK 0xff +enum mt7530_fid { + FID_STANDALONE = 0, + FID_BRIDGED = 1, +}; + #define MT7530_VAWD2 0x98 /* Egress Tag Control */ #define ETAG_CTRL_P(p, x) (((x) & 0x3) << ((p) << 1)) @@ -179,8 +187,8 @@ enum mt7530_vlan_egress_attr { /* Register for port STP state control */ #define MT7530_SSP_P(x) (0x2000 + ((x) * 0x100)) -#define FID_PST(x) ((x) & 0x3) -#define FID_PST_MASK FID_PST(0x3) +#define FID_PST(fid, state) (((state) & 0x3) << ((fid) * 2)) +#define FID_PST_MASK(fid) FID_PST(fid, 0x3) enum mt7530_stp_state { MT7530_STP_DISABLED = 0, @@ -247,7 +255,7 @@ enum mt7530_vlan_port_attr { #define MT7530_PPBV1_P(x) (0x2014 + ((x) * 0x100)) #define G0_PORT_VID(x) (((x) & 0xfff) << 0) #define G0_PORT_VID_MASK G0_PORT_VID(0xfff) -#define G0_PORT_VID_DEF G0_PORT_VID(1) +#define G0_PORT_VID_DEF G0_PORT_VID(0) /* Register for port MAC control register */ #define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100)) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 272b0535d946..ddb51dd132ef 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1221,14 +1221,36 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port) bool found = false; u16 pvlan; - list_for_each_entry(dp, &dst->ports, list) { - if (dp->ds->index == dev && dp->index == port) { + /* dev is a physical switch */ + if (dev <= dst->last_switch) { + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->index == dev && dp->index == port) { + /* dp might be a DSA link or a user port, so it + * might or might not have a bridge_dev + * pointer. Use the "found" variable for both + * cases. + */ + br = dp->bridge_dev; + found = true; + break; + } + } + /* dev is a virtual bridge */ + } else { + list_for_each_entry(dp, &dst->ports, list) { + if (dp->bridge_num < 0) + continue; + + if (dp->bridge_num + 1 + dst->last_switch != dev) + continue; + + br = dp->bridge_dev; found = true; break; } } - /* Prevent frames from unknown switch or port */ + /* Prevent frames from unknown switch or virtual bridge */ if (!found) return 0; @@ -1236,7 +1258,6 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port) if (dp->type == DSA_PORT_TYPE_CPU || dp->type == DSA_PORT_TYPE_DSA) return mv88e6xxx_port_mask(chip); - br = dp->bridge_dev; pvlan = 0; /* Frames from user ports can egress any local DSA links and CPU ports, @@ -2422,6 +2443,44 @@ static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds, mv88e6xxx_reg_unlock(chip); } +/* Treat the software bridge as a virtual single-port switch behind the + * CPU and map in the PVT. First dst->last_switch elements are taken by + * physical switches, so start from beyond that range. + */ +static int mv88e6xxx_map_virtual_bridge_to_pvt(struct dsa_switch *ds, + int bridge_num) +{ + u8 dev = bridge_num + ds->dst->last_switch + 1; + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_pvt_map(chip, dev, 0); + mv88e6xxx_reg_unlock(chip); + + return err; +} + +static int mv88e6xxx_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + return mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num); +} + +static void mv88e6xxx_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + int err; + + err = mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num); + if (err) { + dev_err(ds->dev, "failed to remap cross-chip Port VLAN: %pe\n", + ERR_PTR(err)); + } +} + static int mv88e6xxx_software_reset(struct mv88e6xxx_chip *chip) { if (chip->info->ops->reset) @@ -3025,6 +3084,15 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) chip->ds = ds; ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); + /* Since virtual bridges are mapped in the PVT, the number we support + * depends on the physical switch topology. We need to let DSA figure + * that out and therefore we cannot set this at dsa_register_switch() + * time. + */ + if (mv88e6xxx_has_pvt(chip)) + ds->num_fwd_offloading_bridges = MV88E6XXX_MAX_PVT_SWITCHES - + ds->dst->last_switch - 1; + mv88e6xxx_reg_lock(chip); if (chip->info->ops->setup_errata) { @@ -6128,6 +6196,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .crosschip_lag_change = mv88e6xxx_crosschip_lag_change, .crosschip_lag_join = mv88e6xxx_crosschip_lag_join, .crosschip_lag_leave = mv88e6xxx_crosschip_lag_leave, + .port_bridge_tx_fwd_offload = mv88e6xxx_bridge_tx_fwd_offload, + .port_bridge_tx_fwd_unoffload = mv88e6xxx_bridge_tx_fwd_unoffload, }; static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index a2a15919b960..583a22d901b3 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -231,11 +231,6 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) return 0; } -static const struct dsa_8021q_ops felix_tag_8021q_ops = { - .vlan_add = felix_tag_8021q_vlan_add, - .vlan_del = felix_tag_8021q_vlan_del, -}; - /* Alternatively to using the NPI functionality, that same hardware MAC * connected internally to the enetc or fman DSA master can be configured to * use the software-defined tag_8021q frame format. As far as the hardware is @@ -425,29 +420,18 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC); ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_BC); - felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx), - GFP_KERNEL); - if (!felix->dsa_8021q_ctx) - return -ENOMEM; - - felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops; - felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD); - felix->dsa_8021q_ctx->ds = ds; - - err = dsa_8021q_setup(felix->dsa_8021q_ctx, true); + err = dsa_tag_8021q_register(ds, htons(ETH_P_8021AD)); if (err) - goto out_free_dsa_8021_ctx; + return err; err = felix_setup_mmio_filtering(felix); if (err) - goto out_teardown_dsa_8021q; + goto out_tag_8021q_unregister; return 0; -out_teardown_dsa_8021q: - dsa_8021q_setup(felix->dsa_8021q_ctx, false); -out_free_dsa_8021_ctx: - kfree(felix->dsa_8021q_ctx); +out_tag_8021q_unregister: + dsa_tag_8021q_unregister(ds); return err; } @@ -462,11 +446,7 @@ static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu) dev_err(ds->dev, "felix_teardown_mmio_filtering returned %d", err); - err = dsa_8021q_setup(felix->dsa_8021q_ctx, false); - if (err) - dev_err(ds->dev, "dsa_8021q_setup returned %d", err); - - kfree(felix->dsa_8021q_ctx); + dsa_tag_8021q_unregister(ds); for (port = 0; port < ds->num_ports; port++) { if (dsa_is_unused_port(ds, port)) @@ -1679,6 +1659,8 @@ const struct dsa_switch_ops felix_switch_ops = { .port_mrp_del = felix_mrp_del, .port_mrp_add_ring_role = felix_mrp_add_ring_role, .port_mrp_del_ring_role = felix_mrp_del_ring_role, + .tag_8021q_vlan_add = felix_tag_8021q_vlan_add, + .tag_8021q_vlan_del = felix_tag_8021q_vlan_del, }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port) diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 4d96cad815d5..9da3c6a94c6e 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -60,7 +60,6 @@ struct felix { struct lynx_pcs **pcs; resource_size_t switch_base; resource_size_t imdio_base; - struct dsa_8021q_context *dsa_8021q_ctx; enum dsa_tag_protocol tag_proto; }; diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 221c7abdef0e..9cd7dbdd7db9 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -226,27 +226,13 @@ struct sja1105_flow_block { int num_virtual_links; }; -struct sja1105_bridge_vlan { - struct list_head list; - int port; - u16 vid; - bool pvid; - bool untagged; -}; - -enum sja1105_vlan_state { - SJA1105_VLAN_UNAWARE, - SJA1105_VLAN_BEST_EFFORT, - SJA1105_VLAN_FILTERING_FULL, -}; - struct sja1105_private { struct sja1105_static_config static_config; bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS]; bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS]; phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS]; bool fixed_link[SJA1105_MAX_NUM_PORTS]; - bool best_effort_vlan_filtering; + bool vlan_aware; unsigned long learn_ena; unsigned long ucast_egress_floods; unsigned long bcast_egress_floods; @@ -255,16 +241,14 @@ struct sja1105_private { struct gpio_desc *reset_gpio; struct spi_device *spidev; struct dsa_switch *ds; - struct list_head dsa_8021q_vlans; - struct list_head bridge_vlans; + u16 bridge_pvid[SJA1105_MAX_NUM_PORTS]; + u16 tag_8021q_pvid[SJA1105_MAX_NUM_PORTS]; struct sja1105_flow_block flow_block; struct sja1105_port ports[SJA1105_MAX_NUM_PORTS]; /* Serializes transmission of management frames so that * the switch doesn't confuse them with one another. */ struct mutex mgmt_lock; - struct dsa_8021q_context *dsa_8021q_ctx; - enum sja1105_vlan_state vlan_state; struct devlink_region **regions; struct sja1105_cbs_entry *cbs; struct mii_bus *mdio_base_t1; @@ -311,10 +295,6 @@ int sja1110_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val); /* From sja1105_devlink.c */ int sja1105_devlink_setup(struct dsa_switch *ds); void sja1105_devlink_teardown(struct dsa_switch *ds); -int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx); -int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx); int sja1105_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack); diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index b6a4a16b8c7e..05c7f4ca3b1a 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -115,105 +115,6 @@ static void sja1105_teardown_devlink_regions(struct dsa_switch *ds) kfree(priv->regions); } -static int sja1105_best_effort_vlan_filtering_get(struct sja1105_private *priv, - bool *be_vlan) -{ - *be_vlan = priv->best_effort_vlan_filtering; - - return 0; -} - -static int sja1105_best_effort_vlan_filtering_set(struct sja1105_private *priv, - bool be_vlan) -{ - struct dsa_switch *ds = priv->ds; - bool vlan_filtering; - int port; - int rc; - - priv->best_effort_vlan_filtering = be_vlan; - - rtnl_lock(); - for (port = 0; port < ds->num_ports; port++) { - struct dsa_port *dp; - - if (!dsa_is_user_port(ds, port)) - continue; - - dp = dsa_to_port(ds, port); - vlan_filtering = dsa_port_is_vlan_filtering(dp); - - rc = sja1105_vlan_filtering(ds, port, vlan_filtering, NULL); - if (rc) - break; - } - rtnl_unlock(); - - return rc; -} - -enum sja1105_devlink_param_id { - SJA1105_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, - SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING, -}; - -int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct sja1105_private *priv = ds->priv; - int err; - - switch (id) { - case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING: - err = sja1105_best_effort_vlan_filtering_get(priv, - &ctx->val.vbool); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct sja1105_private *priv = ds->priv; - int err; - - switch (id) { - case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING: - err = sja1105_best_effort_vlan_filtering_set(priv, - ctx->val.vbool); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static const struct devlink_param sja1105_devlink_params[] = { - DSA_DEVLINK_PARAM_DRIVER(SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING, - "best_effort_vlan_filtering", - DEVLINK_PARAM_TYPE_BOOL, - BIT(DEVLINK_PARAM_CMODE_RUNTIME)), -}; - -static int sja1105_setup_devlink_params(struct dsa_switch *ds) -{ - return dsa_devlink_params_register(ds, sja1105_devlink_params, - ARRAY_SIZE(sja1105_devlink_params)); -} - -static void sja1105_teardown_devlink_params(struct dsa_switch *ds) -{ - dsa_devlink_params_unregister(ds, sja1105_devlink_params, - ARRAY_SIZE(sja1105_devlink_params)); -} - int sja1105_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack) @@ -233,23 +134,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds, int sja1105_devlink_setup(struct dsa_switch *ds) { - int rc; - - rc = sja1105_setup_devlink_params(ds); - if (rc) - return rc; - - rc = sja1105_setup_devlink_regions(ds); - if (rc < 0) { - sja1105_teardown_devlink_params(ds); - return rc; - } - - return 0; + return sja1105_setup_devlink_regions(ds); } void sja1105_devlink_teardown(struct dsa_switch *ds) { - sja1105_teardown_devlink_params(ds); sja1105_teardown_devlink_regions(ds); } diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index 147709131c13..f2049f52833c 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -1355,14 +1355,14 @@ u8 sja1105et_fdb_hash(struct sja1105_private *priv, const u8 *addr, u16 vid) { struct sja1105_l2_lookup_params_entry *l2_lookup_params = priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS].entries; - u64 poly_koopman = l2_lookup_params->poly; + u64 input, poly_koopman = l2_lookup_params->poly; /* Convert polynomial from Koopman to 'normal' notation */ u8 poly = (u8)(1 + (poly_koopman << 1)); - u64 vlanid = l2_lookup_params->shared_learn ? 0 : vid; - u64 input = (vlanid << 48) | ether_addr_to_u64(addr); u8 crc = 0; /* seed */ int i; + input = ((u64)vid << 48) | ether_addr_to_u64(addr); + /* Mask the eight bytes starting from MSB one at a time */ for (i = 56; i >= 0; i -= 8) { u8 byte = (input & (0xffull << i)) >> i; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 8667c9754330..241fd25b0b86 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -57,6 +57,81 @@ static bool sja1105_can_forward(struct sja1105_l2_forwarding_entry *l2_fwd, return !!(l2_fwd[from].reach_port & BIT(to)); } +static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid) +{ + struct sja1105_vlan_lookup_entry *vlan; + int count, i; + + vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries; + count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count; + + for (i = 0; i < count; i++) + if (vlan[i].vlanid == vid) + return i; + + /* Return an invalid entry index if not found */ + return -1; +} + +static int sja1105_drop_untagged(struct dsa_switch *ds, int port, bool drop) +{ + struct sja1105_private *priv = ds->priv; + struct sja1105_mac_config_entry *mac; + + mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; + + if (mac[port].drpuntag == drop) + return 0; + + mac[port].drpuntag = drop; + + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, + &mac[port], true); +} + +static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid) +{ + struct sja1105_mac_config_entry *mac; + + mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; + + if (mac[port].vlanid == pvid) + return 0; + + mac[port].vlanid = pvid; + + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, + &mac[port], true); +} + +static int sja1105_commit_pvid(struct dsa_switch *ds, int port) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct sja1105_private *priv = ds->priv; + struct sja1105_vlan_lookup_entry *vlan; + bool drop_untagged = false; + int match, rc; + u16 pvid; + + if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) + pvid = priv->bridge_pvid[port]; + else + pvid = priv->tag_8021q_pvid[port]; + + rc = sja1105_pvid_apply(priv, port, pvid); + if (rc) + return rc; + + vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries; + + match = sja1105_is_vlan_configured(priv, pvid); + + if (match < 0 || !(vlan[match].vmemb_port & BIT(port))) + drop_untagged = true; + + return sja1105_drop_untagged(ds, port, drop_untagged); +} + static int sja1105_init_mac_settings(struct sja1105_private *priv) { struct sja1105_mac_config_entry default_mac = { @@ -124,9 +199,13 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) mac[i] = default_mac; /* Let sja1105_bridge_stp_state_set() keep address learning - * enabled for the CPU port. + * enabled for the DSA ports. CPU ports use software-assisted + * learning to ensure that only FDB entries belonging to the + * bridge are learned, and that they are learned towards all + * CPU ports in a cross-chip topology if multiple CPU ports + * exist. */ - if (dsa_is_cpu_port(ds, i)) + if (dsa_is_dsa_port(ds, i)) priv->learn_ena |= BIT(i); } @@ -378,8 +457,6 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) table->entry_count = 1; for (port = 0; port < ds->num_ports; port++) { - struct sja1105_bridge_vlan *v; - if (dsa_is_unused_port(ds, port)) continue; @@ -387,22 +464,10 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) pvid.vlan_bc |= BIT(port); pvid.tag_port &= ~BIT(port); - v = kzalloc(sizeof(*v), GFP_KERNEL); - if (!v) - return -ENOMEM; - - v->port = port; - v->vid = SJA1105_DEFAULT_VLAN; - v->untagged = true; - if (dsa_is_cpu_port(ds, port)) - v->pvid = true; - list_add(&v->list, &priv->dsa_8021q_vlans); - - v = kmemdup(v, sizeof(*v), GFP_KERNEL); - if (!v) - return -ENOMEM; - - list_add(&v->list, &priv->bridge_vlans); + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { + priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN; + priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN; + } } ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid; @@ -413,8 +478,11 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv) { struct sja1105_l2_forwarding_entry *l2fwd; struct dsa_switch *ds = priv->ds; + struct dsa_switch_tree *dst; struct sja1105_table *table; - int i, j; + struct dsa_link *dl; + int port, tc; + int from, to; table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING]; @@ -432,47 +500,109 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv) l2fwd = table->entries; - /* First 5 entries define the forwarding rules */ - for (i = 0; i < ds->num_ports; i++) { - unsigned int upstream = dsa_upstream_port(priv->ds, i); + /* First 5 entries in the L2 Forwarding Table define the forwarding + * rules and the VLAN PCP to ingress queue mapping. + * Set up the ingress queue mapping first. + */ + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + for (tc = 0; tc < SJA1105_NUM_TC; tc++) + l2fwd[port].vlan_pmap[tc] = tc; + } - if (dsa_is_unused_port(ds, i)) + /* Then manage the forwarding domain for user ports. These can forward + * only to the always-on domain (CPU port and DSA links) + */ + for (from = 0; from < ds->num_ports; from++) { + if (!dsa_is_user_port(ds, from)) continue; - for (j = 0; j < SJA1105_NUM_TC; j++) - l2fwd[i].vlan_pmap[j] = j; + for (to = 0; to < ds->num_ports; to++) { + if (!dsa_is_cpu_port(ds, to) && + !dsa_is_dsa_port(ds, to)) + continue; - /* All ports start up with egress flooding enabled, - * including the CPU port. - */ - priv->ucast_egress_floods |= BIT(i); - priv->bcast_egress_floods |= BIT(i); + l2fwd[from].bc_domain |= BIT(to); + l2fwd[from].fl_domain |= BIT(to); - if (i == upstream) + sja1105_port_allow_traffic(l2fwd, from, to, true); + } + } + + /* Then manage the forwarding domain for DSA links and CPU ports (the + * always-on domain). These can send packets to any enabled port except + * themselves. + */ + for (from = 0; from < ds->num_ports; from++) { + if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from)) continue; - sja1105_port_allow_traffic(l2fwd, i, upstream, true); - sja1105_port_allow_traffic(l2fwd, upstream, i, true); + for (to = 0; to < ds->num_ports; to++) { + if (dsa_is_unused_port(ds, to)) + continue; + + if (from == to) + continue; - l2fwd[i].bc_domain = BIT(upstream); - l2fwd[i].fl_domain = BIT(upstream); + l2fwd[from].bc_domain |= BIT(to); + l2fwd[from].fl_domain |= BIT(to); - l2fwd[upstream].bc_domain |= BIT(i); - l2fwd[upstream].fl_domain |= BIT(i); + sja1105_port_allow_traffic(l2fwd, from, to, true); + } + } + + /* In odd topologies ("H" connections where there is a DSA link to + * another switch which also has its own CPU port), TX packets can loop + * back into the system (they are flooded from CPU port 1 to the DSA + * link, and from there to CPU port 2). Prevent this from happening by + * cutting RX from DSA links towards our CPU port, if the remote switch + * has its own CPU port and therefore doesn't need ours for network + * stack termination. + */ + dst = ds->dst; + + list_for_each_entry(dl, &dst->rtable, list) { + if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp) + continue; + + from = dl->dp->index; + to = dsa_upstream_port(ds, from); + + dev_warn(ds->dev, + "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n", + from, to); + + sja1105_port_allow_traffic(l2fwd, from, to, false); + + l2fwd[from].bc_domain &= ~BIT(to); + l2fwd[from].fl_domain &= ~BIT(to); + } + + /* Finally, manage the egress flooding domain. All ports start up with + * flooding enabled, including the CPU port and DSA links. + */ + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + priv->ucast_egress_floods |= BIT(port); + priv->bcast_egress_floods |= BIT(port); } /* Next 8 entries define VLAN PCP mapping from ingress to egress. * Create a one-to-one mapping. */ - for (i = 0; i < SJA1105_NUM_TC; i++) { - for (j = 0; j < ds->num_ports; j++) { - if (dsa_is_unused_port(ds, j)) + for (tc = 0; tc < SJA1105_NUM_TC; tc++) { + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) continue; - l2fwd[ds->num_ports + i].vlan_pmap[j] = i; + l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc; } - l2fwd[ds->num_ports + i].type_egrpcp2outputq = true; + l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true; } return 0; @@ -551,18 +681,11 @@ void sja1105_frame_memory_partitioning(struct sja1105_private *priv) { struct sja1105_l2_forwarding_params_entry *l2_fwd_params; struct sja1105_vl_forwarding_params_entry *vl_fwd_params; - int max_mem = priv->info->max_frame_mem; struct sja1105_table *table; - /* VLAN retagging is implemented using a loopback port that consumes - * frame buffers. That leaves less for us. - */ - if (priv->vlan_state == SJA1105_VLAN_BEST_EFFORT) - max_mem -= SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD; - table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS]; l2_fwd_params = table->entries; - l2_fwd_params->part_spc[0] = max_mem; + l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY; /* If we have any critical-traffic virtual links, we need to reserve * some frame buffer memory for them. At the moment, hardcode the value @@ -634,6 +757,72 @@ static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv) general_params->tdmaconfigidx = tdmaconfigidx; } +static int sja1105_init_topology(struct sja1105_private *priv, + struct sja1105_general_params_entry *general_params) +{ + struct dsa_switch *ds = priv->ds; + int port; + + /* The host port is the destination for traffic matching mac_fltres1 + * and mac_fltres0 on all ports except itself. Default to an invalid + * value. + */ + general_params->host_port = ds->num_ports; + + /* Link-local traffic received on casc_port will be forwarded + * to host_port without embedding the source port and device ID + * info in the destination MAC address, and no RX timestamps will be + * taken either (presumably because it is a cascaded port and a + * downstream SJA switch already did that). + * To disable the feature, we need to do different things depending on + * switch generation. On SJA1105 we need to set an invalid port, while + * on SJA1110 which support multiple cascaded ports, this field is a + * bitmask so it must be left zero. + */ + if (!priv->info->multiple_cascade_ports) + general_params->casc_port = ds->num_ports; + + for (port = 0; port < ds->num_ports; port++) { + bool is_upstream = dsa_is_upstream_port(ds, port); + bool is_dsa_link = dsa_is_dsa_port(ds, port); + + /* Upstream ports can be dedicated CPU ports or + * upstream-facing DSA links + */ + if (is_upstream) { + if (general_params->host_port == ds->num_ports) { + general_params->host_port = port; + } else { + dev_err(ds->dev, + "Port %llu is already a host port, configuring %d as one too is not supported\n", + general_params->host_port, port); + return -EINVAL; + } + } + + /* Cascade ports are downstream-facing DSA links */ + if (is_dsa_link && !is_upstream) { + if (priv->info->multiple_cascade_ports) { + general_params->casc_port |= BIT(port); + } else if (general_params->casc_port == ds->num_ports) { + general_params->casc_port = port; + } else { + dev_err(ds->dev, + "Port %llu is already a cascade port, configuring %d as one too is not supported\n", + general_params->casc_port, port); + return -EINVAL; + } + } + } + + if (general_params->host_port == ds->num_ports) { + dev_err(ds->dev, "No host port configured\n"); + return -EINVAL; + } + + return 0; +} + static int sja1105_init_general_params(struct sja1105_private *priv) { struct sja1105_general_params_entry default_general_params = { @@ -652,12 +841,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK, .incl_srcpt0 = false, .send_meta0 = false, - /* The destination for traffic matching mac_fltres1 and - * mac_fltres0 on all ports except host_port. Such traffic - * receieved on host_port itself would be dropped, except - * by installing a temporary 'management route' - */ - .host_port = priv->ds->num_ports, /* Default to an invalid value */ .mirr_port = priv->ds->num_ports, /* No TTEthernet */ @@ -677,16 +860,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .header_type = ETH_P_SJA1110, }; struct sja1105_general_params_entry *general_params; - struct dsa_switch *ds = priv->ds; struct sja1105_table *table; - int port; + int rc; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_is_cpu_port(ds, port)) { - default_general_params.host_port = port; - break; - } - } + rc = sja1105_init_topology(priv, &default_general_params); + if (rc) + return rc; table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; @@ -709,19 +888,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv) sja1110_select_tdmaconfigidx(priv); - /* Link-local traffic received on casc_port will be forwarded - * to host_port without embedding the source port and device ID - * info in the destination MAC address, and no RX timestamps will be - * taken either (presumably because it is a cascaded port and a - * downstream SJA switch already did that). - * To disable the feature, we need to do different things depending on - * switch generation. On SJA1105 we need to set an invalid port, while - * on SJA1110 which support multiple cascaded ports, this field is a - * bitmask so it must be left zero. - */ - if (!priv->info->multiple_cascade_ports) - general_params->casc_port = ds->num_ports; - return 0; } @@ -849,7 +1015,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) for (port = 0; port < ds->num_ports; port++) { int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; - if (dsa_is_cpu_port(priv->ds, port)) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) mtu += VLAN_HLEN; policing[port].smax = 65535; /* Burst size in bytes */ @@ -1568,18 +1734,6 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; - /* dsa_8021q is in effect when the bridge's vlan_filtering isn't, - * so the switch still does some VLAN processing internally. - * But Shared VLAN Learning (SVL) is also active, and it will take - * care of autonomous forwarding between the unique pvid's of each - * port. Here we just make sure that users can't add duplicate FDB - * entries when in this mode - the actual VID doesn't matter except - * for what gets printed in 'bridge fdb show'. In the case of zero, - * no VID gets printed at all. - */ - if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL) - vid = 0; - return priv->info->fdb_add_cmd(ds, port, addr, vid); } @@ -1588,9 +1742,6 @@ static int sja1105_fdb_del(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; - if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL) - vid = 0; - return priv->info->fdb_del_cmd(ds, port, addr, vid); } @@ -1633,7 +1784,7 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, u64_to_ether_addr(l2_lookup.macaddr, macaddr); /* We need to hide the dsa_8021q VLANs from the user. */ - if (priv->vlan_state == SJA1105_VLAN_UNAWARE) + if (!priv->vlan_aware) l2_lookup.vlanid = 0; cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data); } @@ -1738,6 +1889,10 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port, if (rc) return rc; + rc = sja1105_commit_pvid(ds, port); + if (rc) + return rc; + return sja1105_manage_flood_domains(priv); } @@ -2037,97 +2192,6 @@ out: return rc; } -static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid) -{ - struct sja1105_mac_config_entry *mac; - - mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; - - mac[port].vlanid = pvid; - - return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, - &mac[port], true); -} - -static int sja1105_crosschip_bridge_join(struct dsa_switch *ds, - int tree_index, int sw_index, - int other_port, struct net_device *br) -{ - struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index); - struct sja1105_private *other_priv = other_ds->priv; - struct sja1105_private *priv = ds->priv; - int port, rc; - - if (other_ds->ops != &sja1105_switch_ops) - return 0; - - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_user_port(ds, port)) - continue; - if (dsa_to_port(ds, port)->bridge_dev != br) - continue; - - rc = dsa_8021q_crosschip_bridge_join(priv->dsa_8021q_ctx, - port, - other_priv->dsa_8021q_ctx, - other_port); - if (rc) - return rc; - - rc = dsa_8021q_crosschip_bridge_join(other_priv->dsa_8021q_ctx, - other_port, - priv->dsa_8021q_ctx, - port); - if (rc) - return rc; - } - - return 0; -} - -static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds, - int tree_index, int sw_index, - int other_port, - struct net_device *br) -{ - struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index); - struct sja1105_private *other_priv = other_ds->priv; - struct sja1105_private *priv = ds->priv; - int port; - - if (other_ds->ops != &sja1105_switch_ops) - return; - - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_user_port(ds, port)) - continue; - if (dsa_to_port(ds, port)->bridge_dev != br) - continue; - - dsa_8021q_crosschip_bridge_leave(priv->dsa_8021q_ctx, port, - other_priv->dsa_8021q_ctx, - other_port); - - dsa_8021q_crosschip_bridge_leave(other_priv->dsa_8021q_ctx, - other_port, - priv->dsa_8021q_ctx, port); - } -} - -static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled) -{ - struct sja1105_private *priv = ds->priv; - int rc; - - rc = dsa_8021q_setup(priv->dsa_8021q_ctx, enabled); - if (rc) - return rc; - - dev_info(ds->dev, "%s switch tagging\n", - enabled ? "Enabled" : "Disabled"); - return 0; -} - static enum dsa_tag_protocol sja1105_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) @@ -2137,669 +2201,6 @@ sja1105_get_tag_protocol(struct dsa_switch *ds, int port, return priv->info->tag_proto; } -static int sja1105_find_free_subvlan(u16 *subvlan_map, bool pvid) -{ - int subvlan; - - if (pvid) - return 0; - - for (subvlan = 1; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - if (subvlan_map[subvlan] == VLAN_N_VID) - return subvlan; - - return -1; -} - -static int sja1105_find_subvlan(u16 *subvlan_map, u16 vid) -{ - int subvlan; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - if (subvlan_map[subvlan] == vid) - return subvlan; - - return -1; -} - -static int sja1105_find_committed_subvlan(struct sja1105_private *priv, - int port, u16 vid) -{ - struct sja1105_port *sp = &priv->ports[port]; - - return sja1105_find_subvlan(sp->subvlan_map, vid); -} - -static void sja1105_init_subvlan_map(u16 *subvlan_map) -{ - int subvlan; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - subvlan_map[subvlan] = VLAN_N_VID; -} - -static void sja1105_commit_subvlan_map(struct sja1105_private *priv, int port, - u16 *subvlan_map) -{ - struct sja1105_port *sp = &priv->ports[port]; - int subvlan; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - sp->subvlan_map[subvlan] = subvlan_map[subvlan]; -} - -static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid) -{ - struct sja1105_vlan_lookup_entry *vlan; - int count, i; - - vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries; - count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count; - - for (i = 0; i < count; i++) - if (vlan[i].vlanid == vid) - return i; - - /* Return an invalid entry index if not found */ - return -1; -} - -static int -sja1105_find_retagging_entry(struct sja1105_retagging_entry *retagging, - int count, int from_port, u16 from_vid, - u16 to_vid) -{ - int i; - - for (i = 0; i < count; i++) - if (retagging[i].ing_port == BIT(from_port) && - retagging[i].vlan_ing == from_vid && - retagging[i].vlan_egr == to_vid) - return i; - - /* Return an invalid entry index if not found */ - return -1; -} - -static int sja1105_commit_vlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan, - struct sja1105_retagging_entry *new_retagging, - int num_retagging) -{ - struct sja1105_retagging_entry *retagging; - struct sja1105_vlan_lookup_entry *vlan; - struct sja1105_table *table; - int num_vlans = 0; - int rc, i, k = 0; - - /* VLAN table */ - table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; - vlan = table->entries; - - for (i = 0; i < VLAN_N_VID; i++) { - int match = sja1105_is_vlan_configured(priv, i); - - if (new_vlan[i].vlanid != VLAN_N_VID) - num_vlans++; - - if (new_vlan[i].vlanid == VLAN_N_VID && match >= 0) { - /* Was there before, no longer is. Delete */ - dev_dbg(priv->ds->dev, "Deleting VLAN %d\n", i); - rc = sja1105_dynamic_config_write(priv, - BLK_IDX_VLAN_LOOKUP, - i, &vlan[match], false); - if (rc < 0) - return rc; - } else if (new_vlan[i].vlanid != VLAN_N_VID) { - /* Nothing changed, don't do anything */ - if (match >= 0 && - vlan[match].vlanid == new_vlan[i].vlanid && - vlan[match].tag_port == new_vlan[i].tag_port && - vlan[match].vlan_bc == new_vlan[i].vlan_bc && - vlan[match].vmemb_port == new_vlan[i].vmemb_port) - continue; - /* Update entry */ - dev_dbg(priv->ds->dev, "Updating VLAN %d\n", i); - rc = sja1105_dynamic_config_write(priv, - BLK_IDX_VLAN_LOOKUP, - i, &new_vlan[i], - true); - if (rc < 0) - return rc; - } - } - - if (table->entry_count) - kfree(table->entries); - - table->entries = kcalloc(num_vlans, table->ops->unpacked_entry_size, - GFP_KERNEL); - if (!table->entries) - return -ENOMEM; - - table->entry_count = num_vlans; - vlan = table->entries; - - for (i = 0; i < VLAN_N_VID; i++) { - if (new_vlan[i].vlanid == VLAN_N_VID) - continue; - vlan[k++] = new_vlan[i]; - } - - /* VLAN Retagging Table */ - table = &priv->static_config.tables[BLK_IDX_RETAGGING]; - retagging = table->entries; - - for (i = 0; i < table->entry_count; i++) { - rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING, - i, &retagging[i], false); - if (rc) - return rc; - } - - if (table->entry_count) - kfree(table->entries); - - table->entries = kcalloc(num_retagging, table->ops->unpacked_entry_size, - GFP_KERNEL); - if (!table->entries) - return -ENOMEM; - - table->entry_count = num_retagging; - retagging = table->entries; - - for (i = 0; i < num_retagging; i++) { - retagging[i] = new_retagging[i]; - - /* Update entry */ - rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING, - i, &retagging[i], true); - if (rc < 0) - return rc; - } - - return 0; -} - -struct sja1105_crosschip_vlan { - struct list_head list; - u16 vid; - bool untagged; - int port; - int other_port; - struct dsa_8021q_context *other_ctx; -}; - -struct sja1105_crosschip_switch { - struct list_head list; - struct dsa_8021q_context *other_ctx; -}; - -static int sja1105_commit_pvid(struct sja1105_private *priv) -{ - struct sja1105_bridge_vlan *v; - struct list_head *vlan_list; - int rc = 0; - - if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) - vlan_list = &priv->bridge_vlans; - else - vlan_list = &priv->dsa_8021q_vlans; - - list_for_each_entry(v, vlan_list, list) { - if (v->pvid) { - rc = sja1105_pvid_apply(priv, v->port, v->vid); - if (rc) - break; - } - } - - return rc; -} - -static int -sja1105_build_bridge_vlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan) -{ - struct sja1105_bridge_vlan *v; - - if (priv->vlan_state == SJA1105_VLAN_UNAWARE) - return 0; - - list_for_each_entry(v, &priv->bridge_vlans, list) { - int match = v->vid; - - new_vlan[match].vlanid = v->vid; - new_vlan[match].vmemb_port |= BIT(v->port); - new_vlan[match].vlan_bc |= BIT(v->port); - if (!v->untagged) - new_vlan[match].tag_port |= BIT(v->port); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - } - - return 0; -} - -static int -sja1105_build_dsa_8021q_vlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan) -{ - struct sja1105_bridge_vlan *v; - - if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) - return 0; - - list_for_each_entry(v, &priv->dsa_8021q_vlans, list) { - int match = v->vid; - - new_vlan[match].vlanid = v->vid; - new_vlan[match].vmemb_port |= BIT(v->port); - new_vlan[match].vlan_bc |= BIT(v->port); - if (!v->untagged) - new_vlan[match].tag_port |= BIT(v->port); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - } - - return 0; -} - -static int sja1105_build_subvlans(struct sja1105_private *priv, - u16 subvlan_map[][DSA_8021Q_N_SUBVLAN], - struct sja1105_vlan_lookup_entry *new_vlan, - struct sja1105_retagging_entry *new_retagging, - int *num_retagging) -{ - struct sja1105_bridge_vlan *v; - int k = *num_retagging; - - if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT) - return 0; - - list_for_each_entry(v, &priv->bridge_vlans, list) { - int upstream = dsa_upstream_port(priv->ds, v->port); - int match, subvlan; - u16 rx_vid; - - /* Only sub-VLANs on user ports need to be applied. - * Bridge VLANs also include VLANs added automatically - * by DSA on the CPU port. - */ - if (!dsa_is_user_port(priv->ds, v->port)) - continue; - - subvlan = sja1105_find_subvlan(subvlan_map[v->port], - v->vid); - if (subvlan < 0) { - subvlan = sja1105_find_free_subvlan(subvlan_map[v->port], - v->pvid); - if (subvlan < 0) { - dev_err(priv->ds->dev, "No more free subvlans\n"); - return -ENOSPC; - } - } - - rx_vid = dsa_8021q_rx_vid_subvlan(priv->ds, v->port, subvlan); - - /* @v->vid on @v->port needs to be retagged to @rx_vid - * on @upstream. Assume @v->vid on @v->port and on - * @upstream was already configured by the previous - * iteration over bridge_vlans. - */ - match = rx_vid; - new_vlan[match].vlanid = rx_vid; - new_vlan[match].vmemb_port |= BIT(v->port); - new_vlan[match].vmemb_port |= BIT(upstream); - new_vlan[match].vlan_bc |= BIT(v->port); - new_vlan[match].vlan_bc |= BIT(upstream); - /* The "untagged" flag is set the same as for the - * original VLAN - */ - if (!v->untagged) - new_vlan[match].tag_port |= BIT(v->port); - /* But it's always tagged towards the CPU */ - new_vlan[match].tag_port |= BIT(upstream); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - - /* The Retagging Table generates packet *clones* with - * the new VLAN. This is a very odd hardware quirk - * which we need to suppress by dropping the original - * packet. - * Deny egress of the original VLAN towards the CPU - * port. This will force the switch to drop it, and - * we'll see only the retagged packets. - */ - match = v->vid; - new_vlan[match].vlan_bc &= ~BIT(upstream); - - /* And the retagging itself */ - new_retagging[k].vlan_ing = v->vid; - new_retagging[k].vlan_egr = rx_vid; - new_retagging[k].ing_port = BIT(v->port); - new_retagging[k].egr_port = BIT(upstream); - if (k++ == SJA1105_MAX_RETAGGING_COUNT) { - dev_err(priv->ds->dev, "No more retagging rules\n"); - return -ENOSPC; - } - - subvlan_map[v->port][subvlan] = v->vid; - } - - *num_retagging = k; - - return 0; -} - -/* Sadly, in crosschip scenarios where the CPU port is also the link to another - * switch, we should retag backwards (the dsa_8021q vid to the original vid) on - * the CPU port of neighbour switches. - */ -static int -sja1105_build_crosschip_subvlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan, - struct sja1105_retagging_entry *new_retagging, - int *num_retagging) -{ - struct sja1105_crosschip_vlan *tmp, *pos; - struct dsa_8021q_crosschip_link *c; - struct sja1105_bridge_vlan *v, *w; - struct list_head crosschip_vlans; - int k = *num_retagging; - int rc = 0; - - if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT) - return 0; - - INIT_LIST_HEAD(&crosschip_vlans); - - list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) { - struct sja1105_private *other_priv = c->other_ctx->ds->priv; - - if (other_priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) - continue; - - /* Crosschip links are also added to the CPU ports. - * Ignore those. - */ - if (!dsa_is_user_port(priv->ds, c->port)) - continue; - if (!dsa_is_user_port(c->other_ctx->ds, c->other_port)) - continue; - - /* Search for VLANs on the remote port */ - list_for_each_entry(v, &other_priv->bridge_vlans, list) { - bool already_added = false; - bool we_have_it = false; - - if (v->port != c->other_port) - continue; - - /* If @v is a pvid on @other_ds, it does not need - * re-retagging, because its SVL field is 0 and we - * already allow that, via the dsa_8021q crosschip - * links. - */ - if (v->pvid) - continue; - - /* Search for the VLAN on our local port */ - list_for_each_entry(w, &priv->bridge_vlans, list) { - if (w->port == c->port && w->vid == v->vid) { - we_have_it = true; - break; - } - } - - if (!we_have_it) - continue; - - list_for_each_entry(tmp, &crosschip_vlans, list) { - if (tmp->vid == v->vid && - tmp->untagged == v->untagged && - tmp->port == c->port && - tmp->other_port == v->port && - tmp->other_ctx == c->other_ctx) { - already_added = true; - break; - } - } - - if (already_added) - continue; - - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) { - dev_err(priv->ds->dev, "Failed to allocate memory\n"); - rc = -ENOMEM; - goto out; - } - tmp->vid = v->vid; - tmp->port = c->port; - tmp->other_port = v->port; - tmp->other_ctx = c->other_ctx; - tmp->untagged = v->untagged; - list_add(&tmp->list, &crosschip_vlans); - } - } - - list_for_each_entry(tmp, &crosschip_vlans, list) { - struct sja1105_private *other_priv = tmp->other_ctx->ds->priv; - int upstream = dsa_upstream_port(priv->ds, tmp->port); - int match, subvlan; - u16 rx_vid; - - subvlan = sja1105_find_committed_subvlan(other_priv, - tmp->other_port, - tmp->vid); - /* If this happens, it's a bug. The neighbour switch does not - * have a subvlan for tmp->vid on tmp->other_port, but it - * should, since we already checked for its vlan_state. - */ - if (WARN_ON(subvlan < 0)) { - rc = -EINVAL; - goto out; - } - - rx_vid = dsa_8021q_rx_vid_subvlan(tmp->other_ctx->ds, - tmp->other_port, - subvlan); - - /* The @rx_vid retagged from @tmp->vid on - * {@tmp->other_ds, @tmp->other_port} needs to be - * re-retagged to @tmp->vid on the way back to us. - * - * Assume the original @tmp->vid is already configured - * on this local switch, otherwise we wouldn't be - * retagging its subvlan on the other switch in the - * first place. We just need to add a reverse retagging - * rule for @rx_vid and install @rx_vid on our ports. - */ - match = rx_vid; - new_vlan[match].vlanid = rx_vid; - new_vlan[match].vmemb_port |= BIT(tmp->port); - new_vlan[match].vmemb_port |= BIT(upstream); - /* The "untagged" flag is set the same as for the - * original VLAN. And towards the CPU, it doesn't - * really matter, because @rx_vid will only receive - * traffic on that port. For consistency with other dsa_8021q - * VLANs, we'll keep the CPU port tagged. - */ - if (!tmp->untagged) - new_vlan[match].tag_port |= BIT(tmp->port); - new_vlan[match].tag_port |= BIT(upstream); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - /* Deny egress of @rx_vid towards our front-panel port. - * This will force the switch to drop it, and we'll see - * only the re-retagged packets (having the original, - * pre-initial-retagging, VLAN @tmp->vid). - */ - new_vlan[match].vlan_bc &= ~BIT(tmp->port); - - /* On reverse retagging, the same ingress VLAN goes to multiple - * ports. So we have an opportunity to create composite rules - * to not waste the limited space in the retagging table. - */ - k = sja1105_find_retagging_entry(new_retagging, *num_retagging, - upstream, rx_vid, tmp->vid); - if (k < 0) { - if (*num_retagging == SJA1105_MAX_RETAGGING_COUNT) { - dev_err(priv->ds->dev, "No more retagging rules\n"); - rc = -ENOSPC; - goto out; - } - k = (*num_retagging)++; - } - /* And the retagging itself */ - new_retagging[k].vlan_ing = rx_vid; - new_retagging[k].vlan_egr = tmp->vid; - new_retagging[k].ing_port = BIT(upstream); - new_retagging[k].egr_port |= BIT(tmp->port); - } - -out: - list_for_each_entry_safe(tmp, pos, &crosschip_vlans, list) { - list_del(&tmp->list); - kfree(tmp); - } - - return rc; -} - -static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify); - -static int sja1105_notify_crosschip_switches(struct sja1105_private *priv) -{ - struct sja1105_crosschip_switch *s, *pos; - struct list_head crosschip_switches; - struct dsa_8021q_crosschip_link *c; - int rc = 0; - - INIT_LIST_HEAD(&crosschip_switches); - - list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) { - bool already_added = false; - - list_for_each_entry(s, &crosschip_switches, list) { - if (s->other_ctx == c->other_ctx) { - already_added = true; - break; - } - } - - if (already_added) - continue; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) { - dev_err(priv->ds->dev, "Failed to allocate memory\n"); - rc = -ENOMEM; - goto out; - } - s->other_ctx = c->other_ctx; - list_add(&s->list, &crosschip_switches); - } - - list_for_each_entry(s, &crosschip_switches, list) { - struct sja1105_private *other_priv = s->other_ctx->ds->priv; - - rc = sja1105_build_vlan_table(other_priv, false); - if (rc) - goto out; - } - -out: - list_for_each_entry_safe(s, pos, &crosschip_switches, list) { - list_del(&s->list); - kfree(s); - } - - return rc; -} - -static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify) -{ - u16 subvlan_map[SJA1105_MAX_NUM_PORTS][DSA_8021Q_N_SUBVLAN]; - struct sja1105_retagging_entry *new_retagging; - struct sja1105_vlan_lookup_entry *new_vlan; - struct sja1105_table *table; - int i, num_retagging = 0; - int rc; - - table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; - new_vlan = kcalloc(VLAN_N_VID, - table->ops->unpacked_entry_size, GFP_KERNEL); - if (!new_vlan) - return -ENOMEM; - - table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; - new_retagging = kcalloc(SJA1105_MAX_RETAGGING_COUNT, - table->ops->unpacked_entry_size, GFP_KERNEL); - if (!new_retagging) { - kfree(new_vlan); - return -ENOMEM; - } - - for (i = 0; i < VLAN_N_VID; i++) - new_vlan[i].vlanid = VLAN_N_VID; - - for (i = 0; i < SJA1105_MAX_RETAGGING_COUNT; i++) - new_retagging[i].vlan_ing = VLAN_N_VID; - - for (i = 0; i < priv->ds->num_ports; i++) - sja1105_init_subvlan_map(subvlan_map[i]); - - /* Bridge VLANs */ - rc = sja1105_build_bridge_vlans(priv, new_vlan); - if (rc) - goto out; - - /* VLANs necessary for dsa_8021q operation, given to us by tag_8021q.c: - * - RX VLANs - * - TX VLANs - * - Crosschip links - */ - rc = sja1105_build_dsa_8021q_vlans(priv, new_vlan); - if (rc) - goto out; - - /* Private VLANs necessary for dsa_8021q operation, which we need to - * determine on our own: - * - Sub-VLANs - * - Sub-VLANs of crosschip switches - */ - rc = sja1105_build_subvlans(priv, subvlan_map, new_vlan, new_retagging, - &num_retagging); - if (rc) - goto out; - - rc = sja1105_build_crosschip_subvlans(priv, new_vlan, new_retagging, - &num_retagging); - if (rc) - goto out; - - rc = sja1105_commit_vlans(priv, new_vlan, new_retagging, num_retagging); - if (rc) - goto out; - - rc = sja1105_commit_pvid(priv); - if (rc) - goto out; - - for (i = 0; i < priv->ds->num_ports; i++) - sja1105_commit_subvlan_map(priv, i, subvlan_map[i]); - - if (notify) { - rc = sja1105_notify_crosschip_switches(priv); - if (rc) - goto out; - } - -out: - kfree(new_vlan); - kfree(new_retagging); - - return rc; -} - /* The TPID setting belongs to the General Parameters table, * which can only be partially reconfigured at runtime (and not the TPID). * So a switch reset is required. @@ -2810,10 +2211,8 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, struct sja1105_l2_lookup_params_entry *l2_lookup_params; struct sja1105_general_params_entry *general_params; struct sja1105_private *priv = ds->priv; - enum sja1105_vlan_state state; struct sja1105_table *table; struct sja1105_rule *rule; - bool want_tagging; u16 tpid, tpid2; int rc; @@ -2844,19 +2243,10 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, sp->xmit_tpid = ETH_P_SJA1105; } - if (!enabled) - state = SJA1105_VLAN_UNAWARE; - else if (priv->best_effort_vlan_filtering) - state = SJA1105_VLAN_BEST_EFFORT; - else - state = SJA1105_VLAN_FILTERING_FULL; - - if (priv->vlan_state == state) + if (priv->vlan_aware == enabled) return 0; - priv->vlan_state = state; - want_tagging = (state == SJA1105_VLAN_UNAWARE || - state == SJA1105_VLAN_BEST_EFFORT); + priv->vlan_aware = enabled; table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; general_params = table->entries; @@ -2870,8 +2260,6 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, general_params->incl_srcpt1 = enabled; general_params->incl_srcpt0 = enabled; - want_tagging = priv->best_effort_vlan_filtering || !enabled; - /* VLAN filtering => independent VLAN learning. * No VLAN filtering (or best effort) => shared VLAN learning. * @@ -2892,132 +2280,143 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, */ table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS]; l2_lookup_params = table->entries; - l2_lookup_params->shared_learn = want_tagging; + l2_lookup_params->shared_learn = !priv->vlan_aware; - sja1105_frame_memory_partitioning(priv); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; - rc = sja1105_build_vlan_table(priv, false); - if (rc) - return rc; + rc = sja1105_commit_pvid(ds, port); + if (rc) + return rc; + } rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING); if (rc) NL_SET_ERR_MSG_MOD(extack, "Failed to change VLAN Ethertype"); - /* Switch port identification based on 802.1Q is only passable - * if we are not under a vlan_filtering bridge. So make sure - * the two configurations are mutually exclusive (of course, the - * user may know better, i.e. best_effort_vlan_filtering). - */ - return sja1105_setup_8021q_tagging(ds, want_tagging); + return rc; } -/* Returns number of VLANs added (0 or 1) on success, - * or a negative error code. - */ -static int sja1105_vlan_add_one(struct dsa_switch *ds, int port, u16 vid, - u16 flags, struct list_head *vlan_list) -{ - bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid = flags & BRIDGE_VLAN_INFO_PVID; - struct sja1105_bridge_vlan *v; - - list_for_each_entry(v, vlan_list, list) { - if (v->port == port && v->vid == vid) { - /* Already added */ - if (v->untagged == untagged && v->pvid == pvid) - /* Nothing changed */ - return 0; - - /* It's the same VLAN, but some of the flags changed - * and the user did not bother to delete it first. - * Update it and trigger sja1105_build_vlan_table. - */ - v->untagged = untagged; - v->pvid = pvid; - return 1; - } - } +static int sja1105_vlan_add(struct sja1105_private *priv, int port, u16 vid, + u16 flags) +{ + struct sja1105_vlan_lookup_entry *vlan; + struct sja1105_table *table; + int match, rc; - v = kzalloc(sizeof(*v), GFP_KERNEL); - if (!v) { - dev_err(ds->dev, "Out of memory while storing VLAN\n"); - return -ENOMEM; + table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; + + match = sja1105_is_vlan_configured(priv, vid); + if (match < 0) { + rc = sja1105_table_resize(table, table->entry_count + 1); + if (rc) + return rc; + match = table->entry_count - 1; } - v->port = port; - v->vid = vid; - v->untagged = untagged; - v->pvid = pvid; - list_add(&v->list, vlan_list); + /* Assign pointer after the resize (it's new memory) */ + vlan = table->entries; + + vlan[match].type_entry = SJA1110_VLAN_D_TAG; + vlan[match].vlanid = vid; + vlan[match].vlan_bc |= BIT(port); + vlan[match].vmemb_port |= BIT(port); + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + vlan[match].tag_port &= ~BIT(port); + else + vlan[match].tag_port |= BIT(port); - return 1; + return sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid, + &vlan[match], true); } -/* Returns number of VLANs deleted (0 or 1) */ -static int sja1105_vlan_del_one(struct dsa_switch *ds, int port, u16 vid, - struct list_head *vlan_list) +static int sja1105_vlan_del(struct sja1105_private *priv, int port, u16 vid) { - struct sja1105_bridge_vlan *v, *n; + struct sja1105_vlan_lookup_entry *vlan; + struct sja1105_table *table; + bool keep = true; + int match, rc; - list_for_each_entry_safe(v, n, vlan_list, list) { - if (v->port == port && v->vid == vid) { - list_del(&v->list); - kfree(v); - return 1; - } - } + table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; + + match = sja1105_is_vlan_configured(priv, vid); + /* Can't delete a missing entry. */ + if (match < 0) + return 0; + + /* Assign pointer after the resize (it's new memory) */ + vlan = table->entries; + + vlan[match].vlanid = vid; + vlan[match].vlan_bc &= ~BIT(port); + vlan[match].vmemb_port &= ~BIT(port); + /* Also unset tag_port, just so we don't have a confusing bitmap + * (no practical purpose). + */ + vlan[match].tag_port &= ~BIT(port); + + /* If there's no port left as member of this VLAN, + * it's time for it to go. + */ + if (!vlan[match].vmemb_port) + keep = false; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid, + &vlan[match], keep); + if (rc < 0) + return rc; + + if (!keep) + return sja1105_table_delete_entry(table, match); return 0; } -static int sja1105_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack) +static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) { struct sja1105_private *priv = ds->priv; - bool vlan_table_changed = false; + u16 flags = vlan->flags; int rc; - /* If the user wants best-effort VLAN filtering (aka vlan_filtering - * bridge plus tagging), be sure to at least deny alterations to the - * configuration done by dsa_8021q. + /* Be sure to deny alterations to the configuration done by tag_8021q. */ - if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL && - vid_is_dsa_8021q(vlan->vid)) { + if (vid_is_dsa_8021q(vlan->vid)) { NL_SET_ERR_MSG_MOD(extack, "Range 1024-3071 reserved for dsa_8021q operation"); return -EBUSY; } - rc = sja1105_vlan_add_one(ds, port, vlan->vid, vlan->flags, - &priv->bridge_vlans); - if (rc < 0) + /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */ + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + flags = 0; + + rc = sja1105_vlan_add(priv, port, vlan->vid, flags); + if (rc) return rc; - if (rc > 0) - vlan_table_changed = true; - if (!vlan_table_changed) - return 0; + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) + priv->bridge_pvid[port] = vlan->vid; - return sja1105_build_vlan_table(priv, true); + return sja1105_commit_pvid(ds, port); } -static int sja1105_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int sja1105_bridge_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct sja1105_private *priv = ds->priv; - bool vlan_table_changed = false; int rc; - rc = sja1105_vlan_del_one(ds, port, vlan->vid, &priv->bridge_vlans); - if (rc > 0) - vlan_table_changed = true; - - if (!vlan_table_changed) - return 0; + rc = sja1105_vlan_del(priv, port, vlan->vid); + if (rc) + return rc; - return sja1105_build_vlan_table(priv, true); + /* In case the pvid was deleted, make sure that untagged packets will + * be dropped. + */ + return sja1105_commit_pvid(ds, port); } static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, @@ -3026,29 +2425,49 @@ static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, struct sja1105_private *priv = ds->priv; int rc; - rc = sja1105_vlan_add_one(ds, port, vid, flags, &priv->dsa_8021q_vlans); - if (rc <= 0) + rc = sja1105_vlan_add(priv, port, vid, flags); + if (rc) return rc; - return sja1105_build_vlan_table(priv, true); + if (flags & BRIDGE_VLAN_INFO_PVID) + priv->tag_8021q_pvid[port] = vid; + + return sja1105_commit_pvid(ds, port); } static int sja1105_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) { struct sja1105_private *priv = ds->priv; - int rc; - - rc = sja1105_vlan_del_one(ds, port, vid, &priv->dsa_8021q_vlans); - if (!rc) - return 0; - return sja1105_build_vlan_table(priv, true); + return sja1105_vlan_del(priv, port, vid); } -static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = { - .vlan_add = sja1105_dsa_8021q_vlan_add, - .vlan_del = sja1105_dsa_8021q_vlan_del, -}; +static int sja1105_prechangeupper(struct dsa_switch *ds, int port, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = info->info.extack; + struct net_device *upper = info->upper_dev; + struct dsa_switch_tree *dst = ds->dst; + struct dsa_port *dp; + + if (is_vlan_dev(upper)) { + NL_SET_ERR_MSG_MOD(extack, "8021q uppers are not supported"); + return -EBUSY; + } + + if (netif_is_bridge_master(upper)) { + list_for_each_entry(dp, &dst->ports, list) { + if (dp->bridge_dev && dp->bridge_dev != upper && + br_vlan_enabled(dp->bridge_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Only one VLAN-aware bridge is supported"); + return -EBUSY; + } + } + } + + return 0; +} /* The programming model for the SJA1105 switch is "all-at-once" via static * configuration tables. Some of these can be dynamically modified at runtime, @@ -3132,24 +2551,22 @@ static int sja1105_setup(struct dsa_switch *ds) * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid. */ ds->vlan_filtering_is_global = true; + ds->untag_bridge_pvid = true; + /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */ + ds->num_fwd_offloading_bridges = 7; /* Advertise the 8 egress queues */ ds->num_tx_queues = SJA1105_NUM_TC; ds->mtu_enforcement_ingress = true; - - priv->best_effort_vlan_filtering = true; + ds->assisted_learning_on_cpu_port = true; rc = sja1105_devlink_setup(ds); if (rc < 0) goto out_static_config_free; - /* The DSA/switchdev model brings up switch ports in standalone mode by - * default, and that means vlan_filtering is 0 since they're not under - * a bridge, so it's safe to set up switch tagging at this time. - */ rtnl_lock(); - rc = sja1105_setup_8021q_tagging(ds, true); + rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q)); rtnl_unlock(); if (rc) goto out_devlink_teardown; @@ -3171,9 +2588,12 @@ out_static_config_free: static void sja1105_teardown(struct dsa_switch *ds) { struct sja1105_private *priv = ds->priv; - struct sja1105_bridge_vlan *v, *n; int port; + rtnl_lock(); + dsa_tag_8021q_unregister(ds); + rtnl_unlock(); + for (port = 0; port < ds->num_ports; port++) { struct sja1105_port *sp = &priv->ports[port]; @@ -3189,16 +2609,6 @@ static void sja1105_teardown(struct dsa_switch *ds) sja1105_tas_teardown(ds); sja1105_ptp_clock_unregister(ds); sja1105_static_config_free(&priv->static_config); - - list_for_each_entry_safe(v, n, &priv->dsa_8021q_vlans, list) { - list_del(&v->list); - kfree(v); - } - - list_for_each_entry_safe(v, n, &priv->bridge_vlans, list) { - list_del(&v->list); - kfree(v); - } } static void sja1105_port_disable(struct dsa_switch *ds, int port) @@ -3334,7 +2744,7 @@ static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu) new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN; - if (dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) new_mtu += VLAN_HLEN; policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries; @@ -3638,8 +3048,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_bridge_flags = sja1105_port_bridge_flags, .port_stp_state_set = sja1105_bridge_stp_state_set, .port_vlan_filtering = sja1105_vlan_filtering, - .port_vlan_add = sja1105_vlan_add, - .port_vlan_del = sja1105_vlan_del, + .port_vlan_add = sja1105_bridge_vlan_add, + .port_vlan_del = sja1105_bridge_vlan_del, .port_mdb_add = sja1105_mdb_add, .port_mdb_del = sja1105_mdb_del, .port_hwtstamp_get = sja1105_hwtstamp_get, @@ -3654,11 +3064,12 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .cls_flower_add = sja1105_cls_flower_add, .cls_flower_del = sja1105_cls_flower_del, .cls_flower_stats = sja1105_cls_flower_stats, - .crosschip_bridge_join = sja1105_crosschip_bridge_join, - .crosschip_bridge_leave = sja1105_crosschip_bridge_leave, - .devlink_param_get = sja1105_devlink_param_get, - .devlink_param_set = sja1105_devlink_param_set, .devlink_info_get = sja1105_devlink_info_get, + .tag_8021q_vlan_add = sja1105_dsa_8021q_vlan_add, + .tag_8021q_vlan_del = sja1105_dsa_8021q_vlan_del, + .port_prechangeupper = sja1105_prechangeupper, + .port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload, + .port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload, }; static const struct of_device_id sja1105_dt_ids[]; @@ -3802,19 +3213,6 @@ static int sja1105_probe(struct spi_device *spi) mutex_init(&priv->ptp_data.lock); mutex_init(&priv->mgmt_lock); - priv->dsa_8021q_ctx = devm_kzalloc(dev, sizeof(*priv->dsa_8021q_ctx), - GFP_KERNEL); - if (!priv->dsa_8021q_ctx) - return -ENOMEM; - - priv->dsa_8021q_ctx->ops = &sja1105_dsa_8021q_ops; - priv->dsa_8021q_ctx->proto = htons(ETH_P_8021Q); - priv->dsa_8021q_ctx->ds = ds; - - INIT_LIST_HEAD(&priv->dsa_8021q_ctx->crosschip_links); - INIT_LIST_HEAD(&priv->bridge_vlans); - INIT_LIST_HEAD(&priv->dsa_8021q_vlans); - sja1105_tas_setup(ds); sja1105_flower_setup(ds); @@ -3837,7 +3235,6 @@ static int sja1105_probe(struct spi_device *spi) struct sja1105_port *sp = &priv->ports[port]; struct dsa_port *dp = dsa_to_port(ds, port); struct net_device *slave; - int subvlan; if (!dsa_is_user_port(ds, port)) continue; @@ -3858,9 +3255,6 @@ static int sja1105_probe(struct spi_device *spi) } skb_queue_head_init(&sp->xmit_queue); sp->xmit_tpid = ETH_P_SJA1105; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - sp->subvlan_map[subvlan] = VLAN_N_VID; } return 0; @@ -3884,8 +3278,10 @@ out_unregister_switch: static int sja1105_remove(struct spi_device *spi) { struct sja1105_private *priv = spi_get_drvdata(spi); + struct dsa_switch *ds = priv->ds; + + dsa_unregister_switch(ds); - dsa_unregister_switch(priv->ds); return 0; } diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index f6e13e6c6a18..ec7b65daec20 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -496,14 +496,11 @@ int sja1105_vl_redirect(struct sja1105_private *priv, int port, struct sja1105_rule *rule = sja1105_rule_find(priv, cookie); int rc; - if (priv->vlan_state == SJA1105_VLAN_UNAWARE && - key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { + if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only redirect based on DMAC"); return -EOPNOTSUPP; - } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT || - priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) && - key->type != SJA1105_KEY_VLAN_AWARE_VL) { + } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only redirect based on {DMAC, VID, PCP}"); return -EOPNOTSUPP; @@ -595,14 +592,11 @@ int sja1105_vl_gate(struct sja1105_private *priv, int port, return -ERANGE; } - if (priv->vlan_state == SJA1105_VLAN_UNAWARE && - key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { + if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only gate based on DMAC"); return -EOPNOTSUPP; - } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT || - priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) && - key->type != SJA1105_KEY_VLAN_AWARE_VL) { + } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only gate based on {DMAC, VID, PCP}"); return -EOPNOTSUPP; diff --git a/drivers/net/eql.c b/drivers/net/eql.c index 74263f8efe1a..8ef34901c2d8 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -113,6 +113,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/compat.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> @@ -131,7 +132,8 @@ static int eql_open(struct net_device *dev); static int eql_close(struct net_device *dev); -static int eql_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); static netdev_tx_t eql_slave_xmit(struct sk_buff *skb, struct net_device *dev); #define eql_is_slave(dev) ((dev->flags & IFF_SLAVE) == IFF_SLAVE) @@ -170,7 +172,7 @@ static const char version[] __initconst = static const struct net_device_ops eql_netdev_ops = { .ndo_open = eql_open, .ndo_stop = eql_close, - .ndo_do_ioctl = eql_ioctl, + .ndo_siocdevprivate = eql_siocdevprivate, .ndo_start_xmit = eql_slave_xmit, }; @@ -268,25 +270,29 @@ static int eql_s_slave_cfg(struct net_device *dev, slave_config_t __user *sc); static int eql_g_master_cfg(struct net_device *dev, master_config_t __user *mc); static int eql_s_master_cfg(struct net_device *dev, master_config_t __user *mc); -static int eql_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { if (cmd != EQL_GETMASTRCFG && cmd != EQL_GETSLAVECFG && !capable(CAP_NET_ADMIN)) return -EPERM; + if (in_compat_syscall()) /* to be implemented */ + return -EOPNOTSUPP; + switch (cmd) { case EQL_ENSLAVE: - return eql_enslave(dev, ifr->ifr_data); + return eql_enslave(dev, data); case EQL_EMANCIPATE: - return eql_emancipate(dev, ifr->ifr_data); + return eql_emancipate(dev, data); case EQL_GETSLAVECFG: - return eql_g_slave_cfg(dev, ifr->ifr_data); + return eql_g_slave_cfg(dev, data); case EQL_SETSLAVECFG: - return eql_s_slave_cfg(dev, ifr->ifr_data); + return eql_s_slave_cfg(dev, data); case EQL_GETMASTRCFG: - return eql_g_master_cfg(dev, ifr->ifr_data); + return eql_g_master_cfg(dev, data); case EQL_SETMASTRCFG: - return eql_s_master_cfg(dev, ifr->ifr_data); + return eql_s_master_cfg(dev, data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 96cc5fc36eb5..87c906e744fb 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -302,7 +302,6 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev) return -ENOMEM; SET_NETDEV_DEV(dev, pdev); - netdev_boot_setup_check(dev); if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) { free_netdev(dev); @@ -421,7 +420,6 @@ static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id) return -ENOMEM; } SET_NETDEV_DEV(dev, &pdev->dev); - netdev_boot_setup_check(dev); el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP); pnp_set_drvdata(pdev, dev); @@ -514,7 +512,9 @@ static int el3_common_init(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); int err; - const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"}; + static const char * const if_names[] = { + "10baseT", "AUI", "undefined", "BNC" + }; spin_lock_init(&lp->lock); @@ -588,7 +588,6 @@ static int el3_eisa_probe(struct device *device) } SET_NETDEV_DEV(dev, device); - netdev_boot_setup_check(dev); el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA); eisa_set_drvdata (edev, dev); diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 47b4215bb93b..8d90fed5d33e 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -407,7 +407,7 @@ MODULE_PARM_DESC(max_interrupt_work, "3c515 maximum events handled per interrupt /* we will need locking (and refcounting) if we ever use it for more */ static LIST_HEAD(root_corkscrew_dev); -int init_module(void) +static int corkscrew_init_module(void) { int found = 0; if (debug >= 0) @@ -416,6 +416,7 @@ int init_module(void) found++; return found ? 0 : -ENODEV; } +module_init(corkscrew_init_module); #else struct net_device *tc515_probe(int unit) diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index f66e7fb9a2bb..dd4d3c48b98d 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -252,7 +252,7 @@ static const struct net_device_ops el3_netdev_ops = { .ndo_start_xmit = el3_start_xmit, .ndo_tx_timeout = el3_tx_timeout, .ndo_get_stats = el3_get_stats, - .ndo_do_ioctl = el3_ioctl, + .ndo_eth_ioctl = el3_ioctl, .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 7d7d3ffe25c3..17c16333a412 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1052,7 +1052,7 @@ static const struct net_device_ops boomrang_netdev_ops = { .ndo_tx_timeout = vortex_tx_timeout, .ndo_get_stats = vortex_get_stats, #ifdef CONFIG_PCI - .ndo_do_ioctl = vortex_ioctl, + .ndo_eth_ioctl = vortex_ioctl, #endif .ndo_set_rx_mode = set_rx_mode, .ndo_set_mac_address = eth_mac_addr, @@ -1069,7 +1069,7 @@ static const struct net_device_ops vortex_netdev_ops = { .ndo_tx_timeout = vortex_tx_timeout, .ndo_get_stats = vortex_get_stats, #ifdef CONFIG_PCI - .ndo_do_ioctl = vortex_ioctl, + .ndo_eth_ioctl = vortex_ioctl, #endif .ndo_set_rx_mode = set_rx_mode, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index a52a3740f0c9..706bd59bf645 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -34,6 +34,7 @@ config EL3 config 3C515 tristate "3c515 ISA \"Fast EtherLink\"" depends on ISA && ISA_DMA_API && !PPC32 + select NETDEV_LEGACY_INIT help If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet network card, say Y here. diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 9f4b302fd2ce..a4130e643342 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -102,6 +102,7 @@ config MCF8390 config NE2000 tristate "NE2000/NE1000 support" depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC) + select NETDEV_LEGACY_INIT if ISA select CRC32 help If you have a network (Ethernet) card of this type, say Y here. @@ -169,6 +170,7 @@ config STNIC config ULTRA tristate "SMC Ultra support" depends on ISA + select NETDEV_LEGACY_INIT select CRC32 help If you have a network (Ethernet) card of this type, say Y here. @@ -186,6 +188,7 @@ config ULTRA config WD80x3 tristate "WD80*3 support" depends on ISA + select NETDEV_LEGACY_INIT select CRC32 help If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index fe6c834c422e..da1ae37a9d73 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -75,7 +75,6 @@ #define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */ -struct net_device * __init apne_probe(int unit); static int apne_probe1(struct net_device *dev, int ioaddr); static void apne_reset_8390(struct net_device *dev); @@ -120,7 +119,7 @@ static u32 apne_msg_enable; module_param_named(msg_enable, apne_msg_enable, uint, 0444); MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)"); -struct net_device * __init apne_probe(int unit) +static struct net_device * __init apne_probe(void) { struct net_device *dev; struct ei_device *ei_local; @@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit) dev = alloc_ei_netdev(); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } ei_local = netdev_priv(dev); ei_local->msg_enable = apne_msg_enable; @@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef MODULE static struct net_device *apne_dev; static int __init apne_module_init(void) { - apne_dev = apne_probe(-1); + apne_dev = apne_probe(); return PTR_ERR_OR_ZERO(apne_dev); } @@ -579,7 +573,6 @@ static void __exit apne_module_exit(void) } module_init(apne_module_init); module_exit(apne_module_exit); -#endif static int init_pcmcia(void) { diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 172947fc051a..6c6bdd5913ec 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev) return (struct ax_device *)(ei_local + 1); } +void ax_NS8390_reinit(struct net_device *dev) +{ + ax_NS8390_init(dev, 1); +} + +EXPORT_SYMBOL_GPL(ax_NS8390_reinit); + /* * ax_initial_check * @@ -635,7 +642,7 @@ static void ax_eeprom_register_write(struct eeprom_93cx6 *eeprom) static const struct net_device_ops ax_netdev_ops = { .ndo_open = ax_open, .ndo_stop = ax_close, - .ndo_do_ioctl = ax_ioctl, + .ndo_eth_ioctl = ax_ioctl, .ndo_start_xmit = ax_ei_start_xmit, .ndo_tx_timeout = ax_ei_tx_timeout, diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 8c321dfc7b3b..3c370e686ec3 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -128,7 +128,7 @@ static inline struct axnet_dev *PRIV(struct net_device *dev) static const struct net_device_ops axnet_netdev_ops = { .ndo_open = axnet_open, .ndo_stop = axnet_close, - .ndo_do_ioctl = axnet_ioctl, + .ndo_eth_ioctl = axnet_ioctl, .ndo_start_xmit = axnet_start_xmit, .ndo_tx_timeout = axnet_tx_timeout, .ndo_get_stats = get_stats, diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index e9756d0ea5b8..53660bc8d6ff 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -923,7 +923,7 @@ static void __init ne_add_devices(void) } #ifdef MODULE -int __init init_module(void) +static int __init ne_init(void) { int retval; ne_add_devices(); @@ -940,6 +940,7 @@ int __init init_module(void) ne_loop_rm_unreg(0); return retval; } +module_init(ne_init); #else /* MODULE */ static int __init ne_init(void) { @@ -951,6 +952,7 @@ static int __init ne_init(void) } module_init(ne_init); +#ifdef CONFIG_NETDEV_LEGACY_INIT struct net_device * __init ne_probe(int unit) { int this_dev; @@ -991,6 +993,7 @@ struct net_device * __init ne_probe(int unit) return ERR_PTR(-ENODEV); } +#endif #endif /* MODULE */ static void __exit ne_exit(void) diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index cac036706382..96ad72abd373 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -223,7 +223,7 @@ static const struct net_device_ops pcnet_netdev_ops = { .ndo_set_config = set_config, .ndo_start_xmit = ei_start_xmit, .ndo_get_stats = ei_get_stats, - .ndo_do_ioctl = ei_ioctl, + .ndo_eth_ioctl = ei_ioctl, .ndo_set_rx_mode = ei_set_multicast_list, .ndo_tx_timeout = ei_tx_timeout, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c index 1d8ed7357b7f..0890fa493f70 100644 --- a/drivers/net/ethernet/8390/smc-ultra.c +++ b/drivers/net/ethernet/8390/smc-ultra.c @@ -522,7 +522,6 @@ static void ultra_pio_input(struct net_device *dev, int count, /* We know skbuffs are padded to at least word alignment. */ insw(ioaddr + IOPD, buf, (count+1)>>1); } - static void ultra_pio_output(struct net_device *dev, int count, const unsigned char *buf, const int start_page) { @@ -572,8 +571,7 @@ MODULE_LICENSE("GPL"); /* This is set up so that only a single autoprobe takes place per call. ISA device autoprobes on a running machine are not recommended. */ -int __init -init_module(void) +static int __init ultra_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -600,6 +598,7 @@ init_module(void) return 0; return -ENXIO; } +module_init(ultra_init_module); static void cleanup_card(struct net_device *dev) { @@ -613,8 +612,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void __exit -cleanup_module(void) +static void __exit ultra_cleanup_module(void) { int this_dev; @@ -627,4 +625,5 @@ cleanup_module(void) } } } +module_exit(ultra_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c index c834123560f1..263a942d81fa 100644 --- a/drivers/net/ethernet/8390/wd.c +++ b/drivers/net/ethernet/8390/wd.c @@ -519,7 +519,7 @@ MODULE_LICENSE("GPL"); /* This is set up so that only a single autoprobe takes place per call. ISA device autoprobes on a running machine are not recommended. */ -int __init init_module(void) +static int __init wd_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -548,6 +548,7 @@ int __init init_module(void) return 0; return -ENXIO; } +module_init(wd_init_module); static void cleanup_card(struct net_device *dev) { @@ -556,8 +557,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void __exit -cleanup_module(void) +static void __exit wd_cleanup_module(void) { int this_dev; @@ -570,4 +570,5 @@ cleanup_module(void) } } } +module_exit(wd_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c index e2c963821ffe..fe7a74707aa4 100644 --- a/drivers/net/ethernet/8390/xsurf100.c +++ b/drivers/net/ethernet/8390/xsurf100.c @@ -22,8 +22,6 @@ #define XS100_8390_DATA_WRITE32_BASE 0x0C80 #define XS100_8390_DATA_AREA_SIZE 0x80 -#define __NS8390_init ax_NS8390_init - /* force unsigned long back to 'void __iomem *' */ #define ax_convert_addr(_a) ((void __force __iomem *)(_a)) @@ -42,10 +40,7 @@ /* Ensure we have our RCR base value */ #define AX88796_PLATFORM -static unsigned char version[] = - "ax88796.c: Copyright 2005,2007 Simtec Electronics\n"; - -#include "lib8390.c" +#include "8390.h" /* from ne.c */ #define NE_CMD EI_SHIFT(0x00) @@ -232,7 +227,7 @@ static void xs100_block_output(struct net_device *dev, int count, if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */ netdev_warn(dev, "timeout waiting for Tx RDC.\n"); ei_local->reset_8390(dev); - ax_NS8390_init(dev, 1); + ax_NS8390_reinit(dev); break; } } diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c index b8e771c2bc40..c4ecf4fcadf8 100644 --- a/drivers/net/ethernet/actions/owl-emac.c +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -1179,8 +1179,8 @@ static int owl_emac_ndo_set_mac_addr(struct net_device *netdev, void *addr) return owl_emac_setup_frame_xmit(netdev_priv(netdev)); } -static int owl_emac_ndo_do_ioctl(struct net_device *netdev, - struct ifreq *req, int cmd) +static int owl_emac_ndo_eth_ioctl(struct net_device *netdev, + struct ifreq *req, int cmd) { if (!netif_running(netdev)) return -EINVAL; @@ -1224,7 +1224,7 @@ static const struct net_device_ops owl_emac_netdev_ops = { .ndo_set_rx_mode = owl_emac_ndo_set_rx_mode, .ndo_set_mac_address = owl_emac_ndo_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = owl_emac_ndo_do_ioctl, + .ndo_eth_ioctl = owl_emac_ndo_eth_ioctl, .ndo_tx_timeout = owl_emac_ndo_tx_timeout, .ndo_get_stats = owl_emac_ndo_get_stats, }; diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 7965e5e3c985..e0f6cc910bd2 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -625,7 +625,7 @@ static const struct net_device_ops netdev_ops = { .ndo_tx_timeout = tx_timeout, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef VLAN_SUPPORT diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 41f8821f792d..920633161174 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3882,7 +3882,7 @@ static const struct net_device_ops et131x_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_get_stats = et131x_stats, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, }; static int et131x_pci_setup(struct pci_dev *pdev, diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index f99ae317c188..037baea1c738 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -774,7 +774,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_start_xmit = emac_start_xmit, .ndo_tx_timeout = emac_timeout, .ndo_set_rx_mode = emac_set_rx_mode, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index d0b0609bbe23..c6a3abec86f5 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -46,6 +46,7 @@ config AMD8111_ETH config LANCE tristate "AMD LANCE and PCnet (AT1500 and NE2100) support" depends on ISA && ISA_DMA_API && !ARM && !PPC32 + select NETDEV_LEGACY_INIT help If you have a network (Ethernet) card of this type, say Y here. Some LinkSys cards are of this type. @@ -132,6 +133,7 @@ config PCMCIA_NMCLAN config NI65 tristate "NI6510 support" depends on ISA && ISA_DMA_API && !ARM && !PPC32 + select NETDEV_LEGACY_INIT help If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 9cac5aa75a73..92e4246dc359 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1729,7 +1729,7 @@ static const struct net_device_ops amd8111e_netdev_ops = { .ndo_set_rx_mode = amd8111e_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = amd8111e_set_mac_address, - .ndo_do_ioctl = amd8111e_ioctl, + .ndo_eth_ioctl = amd8111e_ioctl, .ndo_change_mtu = amd8111e_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = amd8111e_poll, diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index 36f54d13a2eb..9d2f49fd945e 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len ) } -struct net_device * __init atarilance_probe(int unit) +struct net_device * __init atarilance_probe(void) { int i; static int found; @@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } for( i = 0; i < N_LANCE_ADDR; ++i ) { if (lance_probe1( dev, &lance_addr_list[i] )) { @@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr ) return 0; } - -#ifdef MODULE static struct net_device *atarilance_dev; static int __init atarilance_module_init(void) { - atarilance_dev = atarilance_probe(-1); + atarilance_dev = atarilance_probe(); return PTR_ERR_OR_ZERO(atarilance_dev); } @@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void) } module_init(atarilance_module_init); module_exit(atarilance_module_exit); -#endif /* MODULE */ diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 19e195420e24..9c1636222b99 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1051,7 +1051,7 @@ static const struct net_device_ops au1000_netdev_ops = { .ndo_stop = au1000_close, .ndo_start_xmit = au1000_tx, .ndo_set_rx_mode = au1000_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_tx_timeout = au1000_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index 2178e6b89dbd..945bf1d87507 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -327,7 +327,7 @@ MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)"); MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)"); MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)"); -int __init init_module(void) +static int __init lance_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -356,6 +356,7 @@ int __init init_module(void) return 0; return -ENXIO; } +module_init(lance_init_module); static void cleanup_card(struct net_device *dev) { @@ -368,7 +369,7 @@ static void cleanup_card(struct net_device *dev) kfree(lp); } -void __exit cleanup_module(void) +static void __exit lance_cleanup_module(void) { int this_dev; @@ -381,6 +382,7 @@ void __exit cleanup_module(void) } } } +module_exit(lance_cleanup_module); #endif /* MODULE */ MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c index 3f2e4cdd0b83..da97fccea9ea 100644 --- a/drivers/net/ethernet/amd/mvme147.c +++ b/drivers/net/ethernet/amd/mvme147.c @@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = { }; /* Initialise the one and only on-board 7990 */ -struct net_device * __init mvme147lance_probe(int unit) +static struct net_device * __init mvme147lance_probe(void) { struct net_device *dev; static int called; @@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) - sprintf(dev->name, "eth%d", unit); - /* Fill the dev fields */ dev->base_addr = (unsigned long)MVME147_LANCE_BASE; dev->netdev_ops = &lance_netdev_ops; @@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev) return 0; } -#ifdef MODULE MODULE_LICENSE("GPL"); static struct net_device *dev_mvme147_lance; -int __init init_module(void) +static int __init m147lance_init(void) { - dev_mvme147_lance = mvme147lance_probe(-1); + dev_mvme147_lance = mvme147lance_probe(); return PTR_ERR_OR_ZERO(dev_mvme147_lance); } +module_init(m147lance_init); -void __exit cleanup_module(void) +static void __exit m147lance_exit(void) { struct m147lance_private *lp = netdev_priv(dev_mvme147_lance); unregister_netdev(dev_mvme147_lance); free_pages(lp->ram, 3); free_netdev(dev_mvme147_lance); } - -#endif /* MODULE */ +module_exit(m147lance_exit); diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index 5c1cfb0c4a42..b5df7ad5a83f 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -1230,18 +1230,20 @@ MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)"); MODULE_PARM_DESC(io, "ni6510 I/O base address"); MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)"); -int __init init_module(void) +static int __init ni65_init_module(void) { dev_ni65 = ni65_probe(-1); return PTR_ERR_OR_ZERO(dev_ni65); } +module_init(ni65_init_module); -void __exit cleanup_module(void) +static void __exit ni65_cleanup_module(void) { unregister_netdev(dev_ni65); cleanup_card(dev_ni65); free_netdev(dev_ni65); } +module_exit(ni65_cleanup_module); #endif /* MODULE */ MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 4100ab07e6b7..70d76fdb9f56 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1572,7 +1572,7 @@ static const struct net_device_ops pcnet32_netdev_ops = { .ndo_tx_timeout = pcnet32_tx_timeout, .ndo_get_stats = pcnet32_get_stats, .ndo_set_rx_mode = pcnet32_set_multicast_list, - .ndo_do_ioctl = pcnet32_ioctl, + .ndo_eth_ioctl = pcnet32_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index f8d7a9387a56..4a845bc071b2 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev ); /************************* End of Prototypes **************************/ -struct net_device * __init sun3lance_probe(int unit) +static struct net_device * __init sun3lance_probe(void) { struct net_device *dev; static int found; @@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } if (!lance_probe(dev)) goto out; @@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev ) } -#ifdef MODULE - static struct net_device *sun3lance_dev; -int __init init_module(void) +static int __init sun3lance_init(void) { - sun3lance_dev = sun3lance_probe(-1); + sun3lance_dev = sun3lance_probe(); return PTR_ERR_OR_ZERO(sun3lance_dev); } +module_init(sun3lance_init); -void __exit cleanup_module(void) +static void __exit sun3lance_cleanup(void) { unregister_netdev(sun3lance_dev); #ifdef CONFIG_SUN3 @@ -942,6 +937,4 @@ void __exit cleanup_module(void) #endif free_netdev(sun3lance_dev); } - -#endif /* MODULE */ - +module_exit(sun3lance_cleanup); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 4f714f874c4f..17a585adfb49 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2284,7 +2284,7 @@ static const struct net_device_ops xgbe_netdev_ops = { .ndo_set_rx_mode = xgbe_set_rx_mode, .ndo_set_mac_address = xgbe_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = xgbe_ioctl, + .ndo_eth_ioctl = xgbe_ioctl, .ndo_change_mtu = xgbe_change_mtu, .ndo_tx_timeout = xgbe_tx_timeout, .ndo_get_stats64 = xgbe_get_stats64, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 4af0cd9530de..e22935ce9573 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -421,7 +421,7 @@ static const struct net_device_ops aq_ndev_ops = { .ndo_change_mtu = aq_ndev_change_mtu, .ndo_set_mac_address = aq_ndev_set_mac_address, .ndo_set_features = aq_ndev_set_features, - .ndo_do_ioctl = aq_ndev_ioctl, + .ndo_eth_ioctl = aq_ndev_ioctl, .ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid, .ndo_setup_tc = aq_ndo_setup_tc, diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 67b8113a2b53..38c288ec9059 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -844,7 +844,7 @@ static const struct net_device_ops arc_emac_netdev_ops = { .ndo_set_mac_address = arc_emac_set_address, .ndo_get_stats = arc_emac_stats, .ndo_set_rx_mode = arc_emac_set_rx_mode, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = arc_emac_poll_controller, #endif diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 1ba81b1eb6fd..02ae98aabf91 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1851,7 +1851,7 @@ static const struct net_device_ops ag71xx_netdev_ops = { .ndo_open = ag71xx_open, .ndo_stop = ag71xx_stop, .ndo_start_xmit = ag71xx_hard_start_xmit, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = ag71xx_tx_timeout, .ndo_change_mtu = ag71xx_change_mtu, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 11ef1fbe7aee..4ea157efca86 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1701,7 +1701,7 @@ static const struct net_device_ops alx_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = alx_set_mac_address, .ndo_change_mtu = alx_change_mtu, - .ndo_do_ioctl = alx_ioctl, + .ndo_eth_ioctl = alx_ioctl, .ndo_tx_timeout = alx_tx_timeout, .ndo_fix_features = alx_fix_features, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 1c6246a5dc22..3b51b172b317 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2609,7 +2609,7 @@ static const struct net_device_ops atl1c_netdev_ops = { .ndo_change_mtu = atl1c_change_mtu, .ndo_fix_features = atl1c_fix_features, .ndo_set_features = atl1c_set_features, - .ndo_do_ioctl = atl1c_ioctl, + .ndo_eth_ioctl = atl1c_ioctl, .ndo_tx_timeout = atl1c_tx_timeout, .ndo_get_stats = atl1c_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 2eb0a2ab69f6..753973ac922e 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -2247,7 +2247,7 @@ static const struct net_device_ops atl1e_netdev_ops = { .ndo_fix_features = atl1e_fix_features, .ndo_set_features = atl1e_set_features, .ndo_change_mtu = atl1e_change_mtu, - .ndo_do_ioctl = atl1e_ioctl, + .ndo_eth_ioctl = atl1e_ioctl, .ndo_tx_timeout = atl1e_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl1e_netpoll, diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index c67201a13cf5..68f6c0bbd945 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2885,7 +2885,7 @@ static const struct net_device_ops atl1_netdev_ops = { .ndo_change_mtu = atl1_change_mtu, .ndo_fix_features = atlx_fix_features, .ndo_set_features = atlx_set_features, - .ndo_do_ioctl = atlx_ioctl, + .ndo_eth_ioctl = atlx_ioctl, .ndo_tx_timeout = atlx_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl1_poll_controller, diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 0cc0db04c27d..b69298ddb647 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1293,7 +1293,7 @@ static const struct net_device_ops atl2_netdev_ops = { .ndo_change_mtu = atl2_change_mtu, .ndo_fix_features = atl2_fix_features, .ndo_set_features = atl2_set_features, - .ndo_do_ioctl = atl2_ioctl, + .ndo_eth_ioctl = atl2_ioctl, .ndo_tx_timeout = atl2_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl2_poll_controller, diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index ad2655efe423..fa784953c601 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2198,7 +2198,7 @@ static const struct net_device_ops b44_netdev_ops = { .ndo_set_rx_mode = b44_set_rx_mode, .ndo_set_mac_address = b44_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = b44_ioctl, + .ndo_eth_ioctl = b44_ioctl, .ndo_tx_timeout = b44_tx_timeout, .ndo_change_mtu = b44_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 977f097fc7bf..d56886300ecf 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1699,7 +1699,7 @@ static const struct net_device_ops bcm_enet_ops = { .ndo_start_xmit = bcm_enet_start_xmit, .ndo_set_mac_address = bcm_enet_set_mac_address, .ndo_set_rx_mode = bcm_enet_set_multicast_list, - .ndo_do_ioctl = bcm_enet_ioctl, + .ndo_eth_ioctl = bcm_enet_ioctl, .ndo_change_mtu = bcm_enet_change_mtu, }; @@ -2446,7 +2446,7 @@ static const struct net_device_ops bcm_enetsw_ops = { .ndo_stop = bcm_enetsw_stop, .ndo_start_xmit = bcm_enet_start_xmit, .ndo_change_mtu = bcm_enet_change_mtu, - .ndo_do_ioctl = bcm_enetsw_ioctl, + .ndo_eth_ioctl = bcm_enetsw_ioctl, }; @@ -2649,7 +2649,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev) if (!res_mem || irq_rx < 0) return -ENODEV; - ret = 0; dev = alloc_etherdev(sizeof(*priv)); if (!dev) return -ENOMEM; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 075f6e146b29..fe4d99abd548 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1263,7 +1263,7 @@ static const struct net_device_ops bgmac_netdev_ops = { .ndo_set_rx_mode = bgmac_set_rx_mode, .ndo_set_mac_address = bgmac_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_change_mtu = bgmac_change_mtu, }; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index bee6cfad9fc6..89ee1c0e9c79 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8546,7 +8546,7 @@ static const struct net_device_ops bnx2_netdev_ops = { .ndo_stop = bnx2_close, .ndo_get_stats64 = bnx2_get_stats64, .ndo_set_rx_mode = bnx2_set_rx_mode, - .ndo_do_ioctl = bnx2_ioctl, + .ndo_eth_ioctl = bnx2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = bnx2_change_mac_addr, .ndo_change_mtu = bnx2_change_mtu, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2acbc73dcd18..6d98134913cd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13048,7 +13048,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_set_rx_mode = bnx2x_set_rx_mode, .ndo_set_mac_address = bnx2x_change_mac_addr, .ndo_validate_addr = bnx2x_validate_addr, - .ndo_do_ioctl = bnx2x_ioctl, + .ndo_eth_ioctl = bnx2x_ioctl, .ndo_change_mtu = bnx2x_change_mtu, .ndo_fix_features = bnx2x_fix_features, .ndo_set_features = bnx2x_set_features, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 27943b0446c2..f255fd0b16db 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1858,7 +1858,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) { int i; int first_queue_query_index, num_queues_req; - dma_addr_t cur_data_offset; struct stats_query_entry *cur_query_entry; u8 stats_count = 0; bool is_fcoe = false; @@ -1879,10 +1878,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) BNX2X_NUM_ETH_QUEUES(bp), is_fcoe, first_queue_query_index, first_queue_query_index + num_queues_req); - cur_data_offset = bp->fw_stats_data_mapping + - offsetof(struct bnx2x_fw_stats_data, queue_stats) + - num_queues_req * sizeof(struct per_queue_stats); - cur_query_entry = &bp->fw_stats_req-> query[first_queue_query_index + num_queues_req]; @@ -1933,7 +1928,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) cur_query_entry->funcID, j, cur_query_entry->index); cur_query_entry++; - cur_data_offset += sizeof(struct per_queue_stats); stats_count++; /* all stats are coalesced to the leading queue */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 89606587b156..865fcb8cf29f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -277,6 +277,8 @@ static const u16 bnxt_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION, ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG, ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST, + ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP, + ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT, }; static struct workqueue_struct *bnxt_pf_wq; @@ -2042,6 +2044,19 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id) return INVALID_HW_RING_ID; } +static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) +{ + switch (BNXT_EVENT_ERROR_REPORT_TYPE(data1)) { + case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL: + netdev_err(bp->dev, "1PPS: Received invalid signal on pin%lu from the external source. Please fix the signal and reconfigure the pin\n", + BNXT_EVENT_INVALID_SIGNAL_DATA(data2)); + break; + default: + netdev_err(bp->dev, "FW reported unknown error type\n"); + break; + } +} + #define BNXT_GET_EVENT_PORT(data) \ ((data) & \ ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK) @@ -2202,6 +2217,14 @@ static int bnxt_async_event_process(struct bnxt *bp, } goto async_event_process_exit; } + case ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP: { + bnxt_ptp_pps_event(bp, data1, data2); + goto async_event_process_exit; + } + case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT: { + bnxt_event_error_report(bp, data1, data2); + goto async_event_process_exit; + } default: goto async_event_process_exit; } @@ -3140,6 +3163,58 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) return 0; } +static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr) +{ + kfree(cpr->cp_desc_ring); + cpr->cp_desc_ring = NULL; + kfree(cpr->cp_desc_mapping); + cpr->cp_desc_mapping = NULL; +} + +static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n) +{ + cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL); + if (!cpr->cp_desc_ring) + return -ENOMEM; + cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping), + GFP_KERNEL); + if (!cpr->cp_desc_mapping) + return -ENOMEM; + return 0; +} + +static void bnxt_free_all_cp_arrays(struct bnxt *bp) +{ + int i; + + if (!bp->bnapi) + return; + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + + if (!bnapi) + continue; + bnxt_free_cp_arrays(&bnapi->cp_ring); + } +} + +static int bnxt_alloc_all_cp_arrays(struct bnxt *bp) +{ + int i, n = bp->cp_nr_pages; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + int rc; + + if (!bnapi) + continue; + rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n); + if (rc) + return rc; + } + return 0; +} + static void bnxt_free_cp_rings(struct bnxt *bp) { int i; @@ -3167,6 +3242,7 @@ static void bnxt_free_cp_rings(struct bnxt *bp) if (cpr2) { ring = &cpr2->cp_ring_struct; bnxt_free_ring(bp, &ring->ring_mem); + bnxt_free_cp_arrays(cpr2); kfree(cpr2); cpr->cp_ring_arr[j] = NULL; } @@ -3185,6 +3261,12 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) if (!cpr) return NULL; + rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages); + if (rc) { + bnxt_free_cp_arrays(cpr); + kfree(cpr); + return NULL; + } ring = &cpr->cp_ring_struct; rmem = &ring->ring_mem; rmem->nr_pages = bp->cp_nr_pages; @@ -3195,6 +3277,7 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) rc = bnxt_alloc_ring(bp, rmem); if (rc) { bnxt_free_ring(bp, rmem); + bnxt_free_cp_arrays(cpr); kfree(cpr); cpr = NULL; } @@ -3627,9 +3710,15 @@ void bnxt_set_ring_params(struct bnxt *bp) if (jumbo_factor > agg_factor) agg_factor = jumbo_factor; } - agg_ring_size = ring_size * agg_factor; + if (agg_factor) { + if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) { + ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA; + netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n", + bp->rx_ring_size, ring_size); + bp->rx_ring_size = ring_size; + } + agg_ring_size = ring_size * agg_factor; - if (agg_ring_size) { bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size, RX_DESC_CNT); if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) { @@ -4230,6 +4319,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) bnxt_free_tx_rings(bp); bnxt_free_rx_rings(bp); bnxt_free_cp_rings(bp); + bnxt_free_all_cp_arrays(bp); bnxt_free_ntp_fltrs(bp, irq_re_init); if (irq_re_init) { bnxt_free_ring_stats(bp); @@ -4350,6 +4440,10 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) goto alloc_mem_err; } + rc = bnxt_alloc_all_cp_arrays(bp); + if (rc) + goto alloc_mem_err; + bnxt_init_ring_struct(bp); rc = bnxt_alloc_rx_rings(bp); @@ -7495,9 +7589,14 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) rc = -ENODEV; goto no_ptp; } - return 0; + rc = bnxt_ptp_init(bp); + if (!rc) + return 0; + + netdev_warn(bp->dev, "PTP initialization failed.\n"); no_ptp: + bnxt_ptp_clear(bp); kfree(ptp); bp->ptp_cfg = NULL; return rc; @@ -7540,6 +7639,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) flags_ext = le32_to_cpu(resp->flags_ext); if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED) bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED; + if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED)) + bp->fw_cap |= BNXT_FW_CAP_PTP_PPS; bp->tx_push_thresh = 0; if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && @@ -7577,6 +7678,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) { __bnxt_hwrm_ptp_qcfg(bp); } else { + bnxt_ptp_clear(bp); kfree(bp->ptp_cfg); bp->ptp_cfg = NULL; } @@ -10277,15 +10379,9 @@ static int bnxt_open(struct net_device *dev) if (rc) return rc; - if (bnxt_ptp_init(bp)) { - netdev_warn(dev, "PTP initialization failed.\n"); - kfree(bp->ptp_cfg); - bp->ptp_cfg = NULL; - } rc = __bnxt_open_nic(bp, true, true); if (rc) { bnxt_hwrm_if_change(bp, false); - bnxt_ptp_clear(bp); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { @@ -10376,7 +10472,6 @@ static int bnxt_close(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); - bnxt_ptp_clear(bp); bnxt_hwmon_close(bp); bnxt_close_nic(bp, true, true); bnxt_hwrm_shutdown_link(bp); @@ -11363,7 +11458,6 @@ static void bnxt_fw_reset_close(struct bnxt *bp) bnxt_clear_int_mode(bp); pci_disable_device(bp->pdev); } - bnxt_ptp_clear(bp); __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); bnxt_clear_int_mode(bp); @@ -11399,13 +11493,20 @@ static bool is_bnxt_fw_ok(struct bnxt *bp) static void bnxt_force_fw_reset(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; u32 wait_dsecs; if (!test_bit(BNXT_STATE_OPEN, &bp->state) || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return; - set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + if (ptp) { + spin_lock_bh(&ptp->ptp_lock); + set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + spin_unlock_bh(&ptp->ptp_lock); + } else { + set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + } bnxt_fw_reset_close(bp); wait_dsecs = fw_health->master_func_wait_dsecs; if (fw_health->master) { @@ -11461,9 +11562,16 @@ void bnxt_fw_reset(struct bnxt *bp) bnxt_rtnl_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state) && !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; int n = 0, tmo; - set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + if (ptp) { + spin_lock_bh(&ptp->ptp_lock); + set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + spin_unlock_bh(&ptp->ptp_lock); + } else { + set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + } if (bp->pf.active_vfs && !test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) n = bnxt_get_registered_vfs(bp); @@ -12135,6 +12243,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_reenable_sriov(bp); bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); + bnxt_ptp_reapply_pps(bp); bnxt_dl_health_recovery_done(bp); bnxt_dl_health_status_update(bp, true); rtnl_unlock(); @@ -12666,7 +12775,7 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_stop = bnxt_close, .ndo_get_stats64 = bnxt_get_stats64, .ndo_set_rx_mode = bnxt_set_rx_mode, - .ndo_do_ioctl = bnxt_ioctl, + .ndo_eth_ioctl = bnxt_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = bnxt_change_mac_addr, .ndo_change_mtu = bnxt_change_mtu, @@ -12705,6 +12814,7 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) devlink_port_type_clear(&bp->dl_port); + bnxt_ptp_clear(bp); pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); @@ -13317,6 +13427,7 @@ init_err_pci_clean: bnxt_free_hwrm_short_cmd_req(bp); bnxt_free_hwrm_resources(bp); bnxt_ethtool_free(bp); + bnxt_ptp_clear(bp); kfree(bp->ptp_cfg); bp->ptp_cfg = NULL; kfree(bp->fw_health); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index bcf8d00b8c80..9c3324e76ff7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -496,6 +496,16 @@ struct rx_tpa_end_cmp_ext { !!((data1) & \ ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED) +#define BNXT_EVENT_ERROR_REPORT_TYPE(data1) \ + (((data1) & \ + ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK) >>\ + ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT) + +#define BNXT_EVENT_INVALID_SIGNAL_DATA(data2) \ + (((data2) & \ + ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_MASK) >>\ + ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_SFT) + struct nqe_cn { __le16 type; #define NQ_CN_TYPE_MASK 0x3fUL @@ -586,15 +596,17 @@ struct nqe_cn { #define MAX_TPA_SEGS_P5 0x3f #if (BNXT_PAGE_SHIFT == 16) -#define MAX_RX_PAGES 1 +#define MAX_RX_PAGES_AGG_ENA 1 +#define MAX_RX_PAGES 4 #define MAX_RX_AGG_PAGES 4 #define MAX_TX_PAGES 1 -#define MAX_CP_PAGES 8 +#define MAX_CP_PAGES 16 #else -#define MAX_RX_PAGES 8 +#define MAX_RX_PAGES_AGG_ENA 8 +#define MAX_RX_PAGES 32 #define MAX_RX_AGG_PAGES 32 #define MAX_TX_PAGES 8 -#define MAX_CP_PAGES 64 +#define MAX_CP_PAGES 128 #endif #define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd)) @@ -612,6 +624,7 @@ struct nqe_cn { #define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT) #define BNXT_MAX_RX_DESC_CNT (RX_DESC_CNT * MAX_RX_PAGES - 1) +#define BNXT_MAX_RX_DESC_CNT_JUM_ENA (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1) #define BNXT_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1) #define BNXT_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1) @@ -962,11 +975,11 @@ struct bnxt_cp_ring_info { struct dim dim; union { - struct tx_cmp *cp_desc_ring[MAX_CP_PAGES]; - struct nqe_cn *nq_desc_ring[MAX_CP_PAGES]; + struct tx_cmp **cp_desc_ring; + struct nqe_cn **nq_desc_ring; }; - dma_addr_t cp_desc_mapping[MAX_CP_PAGES]; + dma_addr_t *cp_desc_mapping; struct bnxt_stats_mem stats; u32 hw_stats_ctx_id; @@ -1887,6 +1900,7 @@ struct bnxt { #define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000 #define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000 #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000 + #define BNXT_FW_CAP_PTP_PPS 0x10000000 #define BNXT_FW_CAP_RING_MONITOR 0x40000000 #define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 786ca51e669b..485252d12245 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -768,8 +768,13 @@ static void bnxt_get_ringparam(struct net_device *dev, { struct bnxt *bp = netdev_priv(dev); - ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; - ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + if (bp->flags & BNXT_FLAG_AGG_RINGS) { + ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA; + ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + } else { + ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; + ering->rx_jumbo_max_pending = 0; + } ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT; ering->rx_pending = bp->rx_ring_size; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index ec381c2423b8..7f55ebbfd04b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -55,16 +55,19 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info, } /* Caller holds ptp_lock */ -static u64 bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts) +static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts, + u64 *ns) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - u64 ns; + + if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + return -EIO; ptp_read_system_prets(sts); - ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); ptp_read_system_postts(sts); - ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32; - return ns; + *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32; + return 0; } static void bnxt_ptp_get_current_time(struct bnxt *bp) @@ -75,7 +78,7 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp) return; spin_lock_bh(&ptp->ptp_lock); WRITE_ONCE(ptp->old_time, ptp->current_time); - ptp->current_time = bnxt_refclk_read(bp, NULL); + bnxt_refclk_read(bp, NULL, &ptp->current_time); spin_unlock_bh(&ptp->ptp_lock); } @@ -108,9 +111,14 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info, struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, ptp_info); u64 ns, cycles; + int rc; spin_lock_bh(&ptp->ptp_lock); - cycles = bnxt_refclk_read(ptp->bp, sts); + rc = bnxt_refclk_read(ptp->bp, sts, &cycles); + if (rc) { + spin_unlock_bh(&ptp->ptp_lock); + return rc; + } ns = timecounter_cyc2time(&ptp->tc, cycles); spin_unlock_bh(&ptp->ptp_lock); *ts = ns_to_timespec64(ns); @@ -147,10 +155,207 @@ static int bnxt_ptp_adjfreq(struct ptp_clock_info *ptp_info, s32 ppb) return rc; } -static int bnxt_ptp_enable(struct ptp_clock_info *ptp, +void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + struct ptp_clock_event event; + u64 ns, pps_ts; + + pps_ts = EVENT_PPS_TS(data2, data1); + spin_lock_bh(&ptp->ptp_lock); + ns = timecounter_cyc2time(&ptp->tc, pps_ts); + spin_unlock_bh(&ptp->ptp_lock); + + switch (EVENT_DATA2_PPS_EVENT_TYPE(data2)) { + case ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_INTERNAL: + event.pps_times.ts_real = ns_to_timespec64(ns); + event.type = PTP_CLOCK_PPSUSR; + event.index = EVENT_DATA2_PPS_PIN_NUM(data2); + break; + case ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_EXTERNAL: + event.timestamp = ns; + event.type = PTP_CLOCK_EXTTS; + event.index = EVENT_DATA2_PPS_PIN_NUM(data2); + break; + } + + ptp_clock_event(bp->ptp_cfg->ptp_clock, &event); +} + +static int bnxt_ptp_cfg_pin(struct bnxt *bp, u8 pin, u8 usage) +{ + struct hwrm_func_ptp_pin_cfg_input req = {0}; + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u8 state = usage != BNXT_PPS_PIN_NONE; + u8 *pin_state, *pin_usg; + u32 enables; + int rc; + + if (!TSIO_PIN_VALID(pin)) { + netdev_err(ptp->bp->dev, "1PPS: Invalid pin. Check pin-function configuration\n"); + return -EOPNOTSUPP; + } + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_PTP_PIN_CFG, -1, -1); + enables = (FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_STATE | + FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_USAGE) << (pin * 2); + req.enables = cpu_to_le32(enables); + + pin_state = &req.pin0_state; + pin_usg = &req.pin0_usage; + + *(pin_state + (pin * 2)) = state; + *(pin_usg + (pin * 2)) = usage; + + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + return rc; + + ptp->pps_info.pins[pin].usage = usage; + ptp->pps_info.pins[pin].state = state; + + return 0; +} + +static int bnxt_ptp_cfg_event(struct bnxt *bp, u8 event) +{ + struct hwrm_func_ptp_cfg_input req = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_PTP_CFG, -1, -1); + req.enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_PPS_EVENT); + req.ptp_pps_event = event; + return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +} + +void bnxt_ptp_reapply_pps(struct bnxt *bp) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + struct bnxt_pps *pps; + u32 pin = 0; + int rc; + + if (!ptp || !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) || + !(ptp->ptp_info.pin_config)) + return; + pps = &ptp->pps_info; + for (pin = 0; pin < BNXT_MAX_TSIO_PINS; pin++) { + if (pps->pins[pin].state) { + rc = bnxt_ptp_cfg_pin(bp, pin, pps->pins[pin].usage); + if (!rc && pps->pins[pin].event) + rc = bnxt_ptp_cfg_event(bp, + pps->pins[pin].event); + if (rc) + netdev_err(bp->dev, "1PPS: Failed to configure pin%d\n", + pin); + } + } +} + +static int bnxt_get_target_cycles(struct bnxt_ptp_cfg *ptp, u64 target_ns, + u64 *cycles_delta) +{ + u64 cycles_now; + u64 nsec_now, nsec_delta; + int rc; + + spin_lock_bh(&ptp->ptp_lock); + rc = bnxt_refclk_read(ptp->bp, NULL, &cycles_now); + if (rc) { + spin_unlock_bh(&ptp->ptp_lock); + return rc; + } + nsec_now = timecounter_cyc2time(&ptp->tc, cycles_now); + spin_unlock_bh(&ptp->ptp_lock); + + nsec_delta = target_ns - nsec_now; + *cycles_delta = div64_u64(nsec_delta << ptp->cc.shift, ptp->cc.mult); + return 0; +} + +static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, + struct ptp_clock_request *rq) +{ + struct hwrm_func_ptp_cfg_input req = {0}; + struct bnxt *bp = ptp->bp; + struct timespec64 ts; + u64 target_ns, delta; + u16 enables; + int rc; + + ts.tv_sec = rq->perout.start.sec; + ts.tv_nsec = rq->perout.start.nsec; + target_ns = timespec64_to_ns(&ts); + + rc = bnxt_get_target_cycles(ptp, target_ns, &delta); + if (rc) + return rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_PTP_CFG, -1, -1); + + enables = FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD | + FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP | + FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE; + req.enables = cpu_to_le16(enables); + req.ptp_pps_event = 0; + req.ptp_freq_adj_dll_source = 0; + req.ptp_freq_adj_dll_phase = 0; + req.ptp_freq_adj_ext_period = cpu_to_le32(NSEC_PER_SEC); + req.ptp_freq_adj_ext_up = 0; + req.ptp_freq_adj_ext_phase_lower = cpu_to_le32(delta); + + return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +} + +static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq, int on) { - return -EOPNOTSUPP; + struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, + ptp_info); + struct bnxt *bp = ptp->bp; + u8 pin_id; + int rc; + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + /* Configure an External PPS IN */ + pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS, + rq->extts.index); + if (!on) + break; + rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_IN); + if (rc) + return rc; + rc = bnxt_ptp_cfg_event(bp, BNXT_PPS_EVENT_EXTERNAL); + if (!rc) + ptp->pps_info.pins[pin_id].event = BNXT_PPS_EVENT_EXTERNAL; + return rc; + case PTP_CLK_REQ_PEROUT: + /* Configure a Periodic PPS OUT */ + pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT, + rq->perout.index); + if (!on) + break; + + rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_OUT); + if (!rc) + rc = bnxt_ptp_perout_cfg(ptp, rq); + + return rc; + case PTP_CLK_REQ_PPS: + /* Configure PHC PPS IN */ + rc = bnxt_ptp_cfg_pin(bp, 0, BNXT_PPS_PIN_PPS_IN); + if (rc) + return rc; + rc = bnxt_ptp_cfg_event(bp, BNXT_PPS_EVENT_INTERNAL); + if (!rc) + ptp->pps_info.pins[0].event = BNXT_PPS_EVENT_INTERNAL; + return rc; + default: + netdev_err(ptp->bp->dev, "Unrecognized PIN function\n"); + return -EOPNOTSUPP; + } + + return bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_NONE); } static int bnxt_hwrm_ptp_cfg(struct bnxt *bp) @@ -309,8 +514,10 @@ static void bnxt_unmap_ptp_regs(struct bnxt *bp) static u64 bnxt_cc_read(const struct cyclecounter *cc) { struct bnxt_ptp_cfg *ptp = container_of(cc, struct bnxt_ptp_cfg, cc); + u64 ns = 0; - return bnxt_refclk_read(ptp->bp, NULL); + bnxt_refclk_read(ptp->bp, NULL, &ns); + return ns; } static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) @@ -408,6 +615,80 @@ static const struct ptp_clock_info bnxt_ptp_caps = { .enable = bnxt_ptp_enable, }; +static int bnxt_ptp_verify(struct ptp_clock_info *ptp_info, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, + ptp_info); + /* Allow only PPS pin function configuration */ + if (ptp->pps_info.pins[pin].usage <= BNXT_PPS_PIN_PPS_OUT && + func != PTP_PF_PHYSYNC) + return 0; + else + return -EOPNOTSUPP; +} + +/* bp->hwrm_cmd_lock held by the caller */ +static int bnxt_ptp_pps_init(struct bnxt *bp) +{ + struct hwrm_func_ptp_pin_qcfg_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_func_ptp_pin_qcfg_input req = {0}; + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + struct ptp_clock_info *ptp_info; + struct bnxt_pps *pps_info; + u8 *pin_usg; + u32 i, rc; + + /* Query current/default PIN CFG */ + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_PTP_PIN_QCFG, -1, -1); + + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc || !resp->num_pins) + return -EOPNOTSUPP; + + ptp_info = &ptp->ptp_info; + pps_info = &ptp->pps_info; + pps_info->num_pins = resp->num_pins; + ptp_info->n_pins = pps_info->num_pins; + ptp_info->pin_config = kcalloc(ptp_info->n_pins, + sizeof(*ptp_info->pin_config), + GFP_KERNEL); + if (!ptp_info->pin_config) + return -ENOMEM; + + /* Report the TSIO capability to kernel */ + pin_usg = &resp->pin0_usage; + for (i = 0; i < pps_info->num_pins; i++, pin_usg++) { + snprintf(ptp_info->pin_config[i].name, + sizeof(ptp_info->pin_config[i].name), "bnxt_pps%d", i); + ptp_info->pin_config[i].index = i; + ptp_info->pin_config[i].chan = i; + if (*pin_usg == BNXT_PPS_PIN_PPS_IN) + ptp_info->pin_config[i].func = PTP_PF_EXTTS; + else if (*pin_usg == BNXT_PPS_PIN_PPS_OUT) + ptp_info->pin_config[i].func = PTP_PF_PEROUT; + else + ptp_info->pin_config[i].func = PTP_PF_NONE; + + pps_info->pins[i].usage = *pin_usg; + } + + /* Only 1 each of ext_ts and per_out pins is available in HW */ + ptp_info->n_ext_ts = 1; + ptp_info->n_per_out = 1; + ptp_info->pps = 1; + ptp_info->verify = bnxt_ptp_verify; + + return 0; +} + +static bool bnxt_pps_config_ok(struct bnxt *bp) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config; +} + int bnxt_ptp_init(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; @@ -420,6 +701,15 @@ int bnxt_ptp_init(struct bnxt *bp) if (rc) return rc; + if (ptp->ptp_clock && bnxt_pps_config_ok(bp)) + return 0; + + if (ptp->ptp_clock) { + ptp_clock_unregister(ptp->ptp_clock); + ptp->ptp_clock = NULL; + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; + } atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); @@ -433,6 +723,10 @@ int bnxt_ptp_init(struct bnxt *bp) timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); ptp->ptp_info = bnxt_ptp_caps; + if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) { + if (bnxt_ptp_pps_init(bp)) + netdev_err(bp->dev, "1pps not initialized, continuing without 1pps support\n"); + } ptp->ptp_clock = ptp_clock_register(&ptp->ptp_info, &bp->pdev->dev); if (IS_ERR(ptp->ptp_clock)) { int err = PTR_ERR(ptp->ptp_clock); @@ -443,7 +737,7 @@ int bnxt_ptp_init(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_CHIP_P5) { spin_lock_bh(&ptp->ptp_lock); - ptp->current_time = bnxt_refclk_read(bp, NULL); + bnxt_refclk_read(bp, NULL, &ptp->current_time); WRITE_ONCE(ptp->old_time, ptp->current_time); spin_unlock_bh(&ptp->ptp_lock); ptp_schedule_worker(ptp->ptp_clock, 0); @@ -462,6 +756,9 @@ void bnxt_ptp_clear(struct bnxt *bp) ptp_clock_unregister(ptp->ptp_clock); ptp->ptp_clock = NULL; + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; + if (ptp->tx_skb) { dev_kfree_skb_any(ptp->tx_skb); ptp->tx_skb = NULL; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 254ba7bc0f99..cc3cdbaab6cf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -21,11 +21,62 @@ #define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \ PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT) +struct pps_pin { + u8 event; + u8 usage; + u8 state; +}; + +#define TSIO_PIN_VALID(pin) ((pin) < (BNXT_MAX_TSIO_PINS)) + +#define EVENT_DATA2_PPS_EVENT_TYPE(data2) \ + ((data2) & ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE) + +#define EVENT_DATA2_PPS_PIN_NUM(data2) \ + (((data2) & \ + ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_MASK) >>\ + ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_SFT) + +#define BNXT_DATA2_UPPER_MSK \ + ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_MASK + +#define BNXT_DATA2_UPPER_SFT \ + (32 - \ + ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_SFT) + +#define BNXT_DATA1_LOWER_MSK \ + ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_MASK + +#define BNXT_DATA1_LOWER_SFT \ + ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_SFT + +#define EVENT_PPS_TS(data2, data1) \ + (((u64)((data2) & BNXT_DATA2_UPPER_MSK) << BNXT_DATA2_UPPER_SFT) |\ + (((data1) & BNXT_DATA1_LOWER_MSK) >> BNXT_DATA1_LOWER_SFT)) + +#define BNXT_PPS_PIN_DISABLE 0 +#define BNXT_PPS_PIN_ENABLE 1 +#define BNXT_PPS_PIN_NONE 0 +#define BNXT_PPS_PIN_PPS_IN 1 +#define BNXT_PPS_PIN_PPS_OUT 2 +#define BNXT_PPS_PIN_SYNC_IN 3 +#define BNXT_PPS_PIN_SYNC_OUT 4 + +#define BNXT_PPS_EVENT_INTERNAL 1 +#define BNXT_PPS_EVENT_EXTERNAL 2 + +struct bnxt_pps { + u8 num_pins; +#define BNXT_MAX_TSIO_PINS 4 + struct pps_pin pins[BNXT_MAX_TSIO_PINS]; +}; + struct bnxt_ptp_cfg { struct ptp_clock_info ptp_info; struct ptp_clock *ptp_clock; struct cyclecounter cc; struct timecounter tc; + struct bnxt_pps pps_info; /* serialize timecounter access */ spinlock_t ptp_lock; struct sk_buff *tx_skb; @@ -75,6 +126,8 @@ do { \ #endif int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id); +void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2); +void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index db74241935ab..8507198992df 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3659,7 +3659,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = { .ndo_tx_timeout = bcmgenet_timeout, .ndo_set_rx_mode = bcmgenet_set_rx_mode, .ndo_set_mac_address = bcmgenet_set_mac_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_features = bcmgenet_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = bcmgenet_poll_controller, @@ -3972,8 +3972,6 @@ static int bcmgenet_probe(struct platform_device *pdev) */ dev->needed_headroom += 64; - netdev_boot_setup_check(dev); - priv->dev = dev; priv->pdev = pdev; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 5b4568c2ad1c..f38f40eb966e 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2136,7 +2136,7 @@ static const struct net_device_ops sbmac_netdev_ops = { .ndo_start_xmit = sbmac_start_tx, .ndo_set_rx_mode = sbmac_set_rx_mode, .ndo_tx_timeout = sbmac_tx_timeout, - .ndo_do_ioctl = sbmac_mii_ioctl, + .ndo_eth_ioctl = sbmac_mii_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index b0e49643f483..6f82eeaa4b9f 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14290,7 +14290,7 @@ static const struct net_device_ops tg3_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = tg3_set_rx_mode, .ndo_set_mac_address = tg3_set_mac_addr, - .ndo_do_ioctl = tg3_ioctl, + .ndo_eth_ioctl = tg3_ioctl, .ndo_tx_timeout = tg3_tx_timeout, .ndo_change_mtu = tg3_change_mtu, .ndo_fix_features = tg3_fix_features, diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 7d2fe13a52f8..181ebc235925 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3664,7 +3664,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_start_xmit = macb_start_xmit, .ndo_set_rx_mode = macb_set_rx_mode, .ndo_get_stats = macb_get_stats, - .ndo_do_ioctl = macb_ioctl, + .ndo_eth_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = macb_change_mtu, .ndo_set_mac_address = eth_mac_addr, @@ -4323,7 +4323,7 @@ static const struct net_device_ops at91ether_netdev_ops = { .ndo_get_stats = macb_get_stats, .ndo_set_rx_mode = macb_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = macb_ioctl, + .ndo_eth_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = at91ether_poll_controller, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 591229b96257..af116ef83bad 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1457,7 +1457,7 @@ static void free_netsgbuf(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; @@ -1500,7 +1500,7 @@ static void free_netsgbuf_with_resp(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; @@ -3223,7 +3223,7 @@ static const struct net_device_ops lionetdevops = { .ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid, .ndo_change_mtu = liquidio_change_mtu, - .ndo_do_ioctl = liquidio_ioctl, + .ndo_eth_ioctl = liquidio_ioctl, .ndo_fix_features = liquidio_fix_features, .ndo_set_features = liquidio_set_features, .ndo_set_vf_mac = liquidio_set_vf_mac, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index ffddb3126a32..c6fe0f2a4d0e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -843,7 +843,7 @@ static void free_netsgbuf(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; @@ -887,7 +887,7 @@ static void free_netsgbuf_with_resp(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; @@ -1889,7 +1889,7 @@ static const struct net_device_ops lionetdevops = { .ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid, .ndo_change_mtu = liquidio_change_mtu, - .ndo_do_ioctl = liquidio_ioctl, + .ndo_eth_ioctl = liquidio_ioctl, .ndo_fix_features = liquidio_fix_features, .ndo_set_features = liquidio_set_features, }; diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 48ff6fb0eed9..30463a6d1f8c 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1373,7 +1373,7 @@ static const struct net_device_ops octeon_mgmt_ops = { .ndo_start_xmit = octeon_mgmt_xmit, .ndo_set_rx_mode = octeon_mgmt_set_rx_filtering, .ndo_set_mac_address = octeon_mgmt_set_mac_address, - .ndo_do_ioctl = octeon_mgmt_ioctl, + .ndo_eth_ioctl = octeon_mgmt_ioctl, .ndo_change_mtu = octeon_mgmt_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = octeon_mgmt_poll_controller, diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 9361f964bb9b..691e1475d55e 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1322,18 +1322,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "Unable to get usable DMA configuration\n"); goto err_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); - goto err_release_regions; - } - /* MAP PF's configuration registers */ nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); if (!nic->reg_base) { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index e2b290135fd9..d1667b759522 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2096,7 +2096,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, .ndo_bpf = nicvf_xdp, - .ndo_do_ioctl = nicvf_ioctl, + .ndo_eth_ioctl = nicvf_ioctl, .ndo_set_rx_mode = nicvf_set_rx_mode, }; @@ -2130,18 +2130,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "Unable to get usable DMA configuration\n"); goto err_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); - goto err_release_regions; - } - qcount = netif_get_num_default_rss_queues(); /* Restrict multiqset support only for host bound VFs */ diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 512da98019c6..e7575d41f4f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -924,7 +924,7 @@ static const struct net_device_ops cxgb_netdev_ops = { .ndo_get_stats = t1_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = t1_set_rxmode, - .ndo_do_ioctl = t1_ioctl, + .ndo_eth_ioctl = t1_ioctl, .ndo_change_mtu = t1_change_mtu, .ndo_set_mac_address = t1_set_mac_addr, .ndo_fix_features = t1_fix_features, diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 57f210c53afc..72af9d2a00ae 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2135,13 +2135,18 @@ static int in_range(int val, int lo, int hi) return val < 0 || (val <= hi && val >= lo); } -static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) +static int cxgb_siocdevprivate(struct net_device *dev, + struct ifreq *ifreq, + void __user *useraddr, + int cmd) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; - u32 cmd; int ret; + if (cmd != SIOCCHIOCTL) + return -EOPNOTSUPP; + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) return -EFAULT; @@ -2546,8 +2551,6 @@ static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd) fallthrough; case SIOCGMIIPHY: return mdio_mii_ioctl(&pi->phy.mdio, data, cmd); - case SIOCCHIOCTL: - return cxgb_extension_ioctl(dev, req->ifr_data); default: return -EOPNOTSUPP; } @@ -3181,7 +3184,8 @@ static const struct net_device_ops cxgb_netdev_ops = { .ndo_get_stats = cxgb_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = cxgb_set_rxmode, - .ndo_do_ioctl = cxgb_ioctl, + .ndo_eth_ioctl = cxgb_ioctl, + .ndo_siocdevprivate = cxgb_siocdevprivate, .ndo_change_mtu = cxgb_change_mtu, .ndo_set_mac_address = cxgb_set_mac_addr, .ndo_fix_features = cxgb_fix_features, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 6260b3bebd2b..786ceae34488 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1441,7 +1441,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev, } else if (iconf & USE_ENC_IDX_F) { if (f->fs.val.encap_vld) { struct port_info *pi = netdev_priv(f->dev); - u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; + static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; /* allocate MPS TCAM entry */ ret = t4_alloc_encap_mac_filt(adapter, pi->viid, @@ -1688,7 +1688,7 @@ int __cxgb4_set_filter(struct net_device *dev, int ftid, } else if (iconf & USE_ENC_IDX_F) { if (f->fs.val.encap_vld) { struct port_info *pi = netdev_priv(f->dev); - u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; + static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; /* allocate MPS TCAM entry */ ret = t4_alloc_encap_mac_filt(adapter, pi->viid, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index dbf9a0e6601d..aa8573202c37 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3872,7 +3872,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_set_mac_address = cxgb_set_mac_addr, .ndo_set_features = cxgb_set_features, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = cxgb_ioctl, + .ndo_eth_ioctl = cxgb_ioctl, .ndo_change_mtu = cxgb_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = cxgb_netpoll, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 2820a0bb971b..2842628ad2c5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2837,7 +2837,7 @@ static const struct net_device_ops cxgb4vf_netdev_ops = { .ndo_set_rx_mode = cxgb4vf_set_rxmode, .ndo_set_mac_address = cxgb4vf_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = cxgb4vf_do_ioctl, + .ndo_eth_ioctl = cxgb4vf_do_ioctl, .ndo_change_mtu = cxgb4vf_change_mtu, .ndo_fix_features = cxgb4vf_fix_features, .ndo_set_features = cxgb4vf_set_features, diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig index d8af9e64dd1e..dac1764ba740 100644 --- a/drivers/net/ethernet/cirrus/Kconfig +++ b/drivers/net/ethernet/cirrus/Kconfig @@ -6,7 +6,7 @@ config NET_VENDOR_CIRRUS bool "Cirrus devices" default y - depends on ISA || EISA || ARM || MAC + depends on ISA || EISA || ARM || MAC || COMPILE_TEST help If you have a network (Ethernet) card belonging to this class, say Y. @@ -18,9 +18,16 @@ config NET_VENDOR_CIRRUS if NET_VENDOR_CIRRUS config CS89x0 - tristate "CS89x0 support" - depends on ISA || EISA || ARM + tristate + +config CS89x0_ISA + tristate "CS89x0 ISA driver support" + depends on HAS_IOPORT_MAP + depends on ISA depends on !PPC32 + depends on CS89x0_PLATFORM=n + select NETDEV_LEGACY_INIT + select CS89x0 help Support for CS89x0 chipset based Ethernet cards. If you have a network (Ethernet) card of this type, say Y and read the file @@ -30,15 +37,15 @@ config CS89x0 will be called cs89x0. config CS89x0_PLATFORM - bool "CS89x0 platform driver support" if HAS_IOPORT_MAP - default !HAS_IOPORT_MAP - depends on CS89x0 + tristate "CS89x0 platform driver support" + depends on ARM || COMPILE_TEST + select CS89x0 help - Say Y to compile the cs89x0 driver as a platform driver. This - makes this driver suitable for use on certain evaluation boards - such as the iMX21ADS. + Say Y to compile the cs89x0 platform driver. This makes this driver + suitable for use on certain evaluation boards such as the iMX21ADS. - If you are unsure, say N. + To compile this driver as a module, choose M here. The module + will be called cs89x0. config EP93XX_ETH tristate "EP93xx Ethernet support" diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index 33ace3307059..d0c4c8b7a15a 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -104,7 +104,7 @@ static char version[] __initdata = * them to system IRQ numbers. This mapping is card specific and is set to * the configuration of the Cirrus Eval board for this chip. */ -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) static unsigned int netcard_portlist[] __used __initdata = { 0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240, 0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0 @@ -292,7 +292,7 @@ write_irq(struct net_device *dev, int chip_type, int irq) int i; if (chip_type == CS8900) { -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) /* Search the mapping table for the corresponding IRQ pin. */ for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++) if (cs8900_irq_map[i] == irq) @@ -859,7 +859,7 @@ net_open(struct net_device *dev) goto bad_out; } } else { -#if !defined(CONFIG_CS89x0_PLATFORM) +#if IS_ENABLED(CONFIG_CS89x0_ISA) if (((1 << dev->irq) & lp->irq_map) == 0) { pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n", dev->name, dev->irq, lp->irq_map); @@ -1523,7 +1523,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) dev->irq = i; } else { i = lp->isa_config & INT_NO_MASK; -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) if (lp->chip_type == CS8900) { /* Translate the IRQ using the IRQ mapping table. */ if (i >= ARRAY_SIZE(cs8900_irq_map)) @@ -1576,7 +1576,7 @@ out1: return retval; } -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) /* * This function converts the I/O port address used by the cs89x0_probe() and * init_module() functions to the I/O memory address used by the @@ -1682,11 +1682,7 @@ out: pr_warn("no cs8900 or cs8920 detected. Be sure to disable PnP with SETUP\n"); return ERR_PTR(err); } -#endif -#endif - -#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM) - +#else static struct net_device *dev_cs89x0; /* Support the 'debug' module parm even if we're compiled for non-debug to @@ -1757,9 +1753,9 @@ MODULE_LICENSE("GPL"); * (hw or software util) */ -int __init init_module(void) +static int __init cs89x0_isa_init_module(void) { - struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); + struct net_device *dev; struct net_local *lp; int ret = 0; @@ -1768,6 +1764,7 @@ int __init init_module(void) #else debug = 0; #endif + dev = alloc_etherdev(sizeof(struct net_local)); if (!dev) return -ENOMEM; @@ -1826,9 +1823,9 @@ out: free_netdev(dev); return ret; } +module_init(cs89x0_isa_init_module); -void __exit -cleanup_module(void) +static void __exit cs89x0_isa_cleanup_module(void) { struct net_local *lp = netdev_priv(dev_cs89x0); @@ -1838,9 +1835,11 @@ cleanup_module(void) release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT); free_netdev(dev_cs89x0); } -#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */ +module_exit(cs89x0_isa_cleanup_module); +#endif /* MODULE */ +#endif /* CONFIG_CS89x0_ISA */ -#ifdef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_PLATFORM) static int __init cs89x0_platform_probe(struct platform_device *pdev) { struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index 9f5e5ec69991..072fac5f5d24 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -733,7 +733,7 @@ static const struct net_device_ops ep93xx_netdev_ops = { .ndo_open = ep93xx_open, .ndo_stop = ep93xx_close, .ndo_start_xmit = ep93xx_xmit, - .ndo_do_ioctl = ep93xx_ioctl, + .ndo_eth_ioctl = ep93xx_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 2a8bf53c2f75..e842de6f6635 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1372,7 +1372,7 @@ static const struct net_device_ops dm9000_netdev_ops = { .ndo_start_xmit = dm9000_start_xmit, .ndo_tx_timeout = dm9000_timeout, .ndo_set_rx_mode = dm9000_hash_table, - .ndo_do_ioctl = dm9000_ioctl, + .ndo_eth_ioctl = dm9000_ioctl, .ndo_set_features = dm9000_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index b125d7faefdf..36ab4cbf2ad0 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -443,6 +443,7 @@ ========================================================================= */ +#include <linux/compat.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> @@ -902,7 +903,8 @@ static int de4x5_close(struct net_device *dev); static struct net_device_stats *de4x5_get_stats(struct net_device *dev); static void de4x5_local_stats(struct net_device *dev, char *buf, int pkt_len); static void set_multicast_list(struct net_device *dev); -static int de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int de4x5_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); /* ** Private functions @@ -1084,7 +1086,7 @@ static const struct net_device_ops de4x5_netdev_ops = { .ndo_start_xmit = de4x5_queue_pkt, .ndo_get_stats = de4x5_get_stats, .ndo_set_rx_mode = set_multicast_list, - .ndo_do_ioctl = de4x5_ioctl, + .ndo_siocdevprivate = de4x5_siocdevprivate, .ndo_set_mac_address= eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -5357,7 +5359,7 @@ de4x5_dbg_rx(struct sk_buff *skb, int len) ** this function is only used for my testing. */ static int -de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +de4x5_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct de4x5_private *lp = netdev_priv(dev); struct de4x5_ioctl *ioc = (struct de4x5_ioctl *) &rq->ifr_ifru; @@ -5371,6 +5373,9 @@ de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } tmp; u_long flags = 0; + if (cmd != SIOCDEVPRIVATE || in_compat_syscall()) + return -EOPNOTSUPP; + switch(ioc->cmd) { case DE4X5_GET_HWADDR: /* Get the hardware address */ ioc->len = ETH_ALEN; diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index c1dcd6ca1457..fcedd733bacb 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1271,7 +1271,7 @@ static const struct net_device_ops tulip_netdev_ops = { .ndo_tx_timeout = tulip_tx_timeout, .ndo_stop = tulip_close, .ndo_get_stats = tulip_get_stats, - .ndo_do_ioctl = private_ioctl, + .ndo_eth_ioctl = private_ioctl, .ndo_set_rx_mode = set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 1876f15dd827..85b99099c6b9 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -341,7 +341,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = start_tx, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 734acb834c98..202ecb132053 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -95,7 +95,7 @@ static const struct net_device_ops netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_set_rx_mode = set_multicast, - .ndo_do_ioctl = rio_ioctl, + .ndo_eth_ioctl = rio_ioctl, .ndo_tx_timeout = rio_tx_timeout, }; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index ee0ca712dd1c..c36d186dffed 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -479,7 +479,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = start_tx, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = tx_timeout, .ndo_change_mtu = change_mtu, .ndo_set_mac_address = sundance_set_mac_addr, diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 48c6eb142dcc..6c51cf991dad 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -742,7 +742,7 @@ static const struct net_device_ops dnet_netdev_ops = { .ndo_stop = dnet_close, .ndo_get_stats = dnet_get_stats, .ndo_start_xmit = dnet_start_xmit, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index e1b43b07755b..ed1ed48e7483 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1009,7 +1009,7 @@ static const struct ethtool_ops ethoc_ethtool_ops = { static const struct net_device_ops ethoc_netdev_ops = { .ndo_open = ethoc_open, .ndo_stop = ethoc_stop, - .ndo_do_ioctl = ethoc_ioctl, + .ndo_eth_ioctl = ethoc_ioctl, .ndo_set_mac_address = ethoc_set_mac_address, .ndo_set_rx_mode = ethoc_set_multicast_list, .ndo_change_mtu = ethoc_change_mtu, diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 11dbbfd38770..ff76e401a014 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1616,7 +1616,7 @@ static const struct net_device_ops ftgmac100_netdev_ops = { .ndo_start_xmit = ftgmac100_hard_start_xmit, .ndo_set_mac_address = ftgmac100_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = ftgmac100_tx_timeout, .ndo_set_rx_mode = ftgmac100_set_rx_mode, .ndo_set_features = ftgmac100_set_features, diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 5a1a8f2ea63c..8a341e2d5833 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1043,7 +1043,7 @@ static const struct net_device_ops ftmac100_netdev_ops = { .ndo_start_xmit = ftmac100_hard_start_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = ftmac100_do_ioctl, + .ndo_eth_ioctl = ftmac100_do_ioctl, }; /****************************************************************************** diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index 0f141c14d72d..25c91b3c5fd3 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -463,7 +463,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = start_tx, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = mii_ioctl, + .ndo_eth_ioctl = mii_ioctl, .ndo_tx_timeout = fealnx_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index e6826561cf11..685d2d8a3b36 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -3157,7 +3157,7 @@ static const struct net_device_ops dpaa_ops = { .ndo_set_mac_address = dpaa_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = dpaa_set_rx_mode, - .ndo_do_ioctl = dpaa_ioctl, + .ndo_eth_ioctl = dpaa_ioctl, .ndo_setup_tc = dpaa_setup_tc, .ndo_change_mtu = dpaa_change_mtu, .ndo_bpf = dpaa_xdp, diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index c2ef74052ef8..3d9842af7f10 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o -fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o +fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o # Needed by the tracing framework CFLAGS_dpaa2-eth.o := -I$(src) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 833696245565..8e09f65ea295 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -68,7 +68,7 @@ dpaa2_eth_dl_trap_item_lookup(struct dpaa2_eth_priv *priv, u16 trap_id) struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv, struct dpaa2_fapr *fapr) { - struct dpaa2_faf_error_bit { + static const struct dpaa2_faf_error_bit { int position; enum devlink_trap_generic_id trap_id; } faf_bits[] = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 973352393bd4..7065c71ed7b8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2594,7 +2594,7 @@ static const struct net_device_ops dpaa2_eth_ops = { .ndo_get_stats64 = dpaa2_eth_get_stats, .ndo_set_rx_mode = dpaa2_eth_set_rx_mode, .ndo_set_features = dpaa2_eth_set_features, - .ndo_do_ioctl = dpaa2_eth_ioctl, + .ndo_eth_ioctl = dpaa2_eth_ioctl, .ndo_change_mtu = dpaa2_eth_change_mtu, .ndo_bpf = dpaa2_eth_xdp, .ndo_xdp_xmit = dpaa2_eth_xdp_xmit, @@ -4138,7 +4138,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv) int err; dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent); - dpmac_dev = fsl_mc_get_endpoint(dpni_dev); + dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0); if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER) return PTR_ERR(dpmac_dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c index 70e04321c420..720c9230cab5 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c @@ -15,18 +15,18 @@ static struct { enum dpsw_counter id; char name[ETH_GSTRING_LEN]; } dpaa2_switch_ethtool_counters[] = { - {DPSW_CNT_ING_FRAME, "rx frames"}, - {DPSW_CNT_ING_BYTE, "rx bytes"}, - {DPSW_CNT_ING_FLTR_FRAME, "rx filtered frames"}, - {DPSW_CNT_ING_FRAME_DISCARD, "rx discarded frames"}, - {DPSW_CNT_ING_BCAST_FRAME, "rx b-cast frames"}, - {DPSW_CNT_ING_BCAST_BYTES, "rx b-cast bytes"}, - {DPSW_CNT_ING_MCAST_FRAME, "rx m-cast frames"}, - {DPSW_CNT_ING_MCAST_BYTE, "rx m-cast bytes"}, - {DPSW_CNT_EGR_FRAME, "tx frames"}, - {DPSW_CNT_EGR_BYTE, "tx bytes"}, - {DPSW_CNT_EGR_FRAME_DISCARD, "tx discarded frames"}, - {DPSW_CNT_ING_NO_BUFF_DISCARD, "rx discarded no buffer frames"}, + {DPSW_CNT_ING_FRAME, "[hw] rx frames"}, + {DPSW_CNT_ING_BYTE, "[hw] rx bytes"}, + {DPSW_CNT_ING_FLTR_FRAME, "[hw] rx filtered frames"}, + {DPSW_CNT_ING_FRAME_DISCARD, "[hw] rx discarded frames"}, + {DPSW_CNT_ING_BCAST_FRAME, "[hw] rx bcast frames"}, + {DPSW_CNT_ING_BCAST_BYTES, "[hw] rx bcast bytes"}, + {DPSW_CNT_ING_MCAST_FRAME, "[hw] rx mcast frames"}, + {DPSW_CNT_ING_MCAST_BYTE, "[hw] rx mcast bytes"}, + {DPSW_CNT_EGR_FRAME, "[hw] tx frames"}, + {DPSW_CNT_EGR_BYTE, "[hw] tx bytes"}, + {DPSW_CNT_EGR_FRAME_DISCARD, "[hw] tx discarded frames"}, + {DPSW_CNT_ING_NO_BUFF_DISCARD, "[hw] rx nobuffer discards"}, }; #define DPAA2_SWITCH_NUM_COUNTERS ARRAY_SIZE(dpaa2_switch_ethtool_counters) @@ -62,6 +62,10 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev, struct dpsw_link_state state = {0}; int err = 0; + if (dpaa2_switch_port_is_type_phy(port_priv)) + return phylink_ethtool_ksettings_get(port_priv->mac->phylink, + link_ksettings); + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, port_priv->idx, @@ -95,6 +99,10 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, bool if_running; int err = 0, ret; + if (dpaa2_switch_port_is_type_phy(port_priv)) + return phylink_ethtool_ksettings_set(port_priv->mac->phylink, + link_ksettings); + /* Interface needs to be down to change link settings */ if_running = netif_running(netdev); if (if_running) { @@ -134,11 +142,17 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, return err; } -static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset) +static int +dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset) { + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS; + switch (sset) { case ETH_SS_STATS: - return DPAA2_SWITCH_NUM_COUNTERS; + if (port_priv->mac) + num_ss_stats += dpaa2_mac_get_sset_count(); + return num_ss_stats; default: return -EOPNOTSUPP; } @@ -147,14 +161,19 @@ static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset) static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) - memcpy(data + i * ETH_GSTRING_LEN, - dpaa2_switch_ethtool_counters[i].name, + for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) { + memcpy(p, dpaa2_switch_ethtool_counters[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + if (port_priv->mac) + dpaa2_mac_get_strings(p); break; } } @@ -176,6 +195,9 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev, netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n", dpaa2_switch_ethtool_counters[i].name, err); } + + if (port_priv->mac) + dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i); } const struct ethtool_ops dpaa2_switch_port_ethtool_ops = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index f9451ec5f2cb..d6eefbbf163f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -111,11 +111,11 @@ static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls, return 0; } -int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, +int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, struct dpaa2_switch_acl_entry *entry) { struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg; - struct ethsw_core *ethsw = acl_tbl->ethsw; + struct ethsw_core *ethsw = filter_block->ethsw; struct dpsw_acl_key *acl_key = &entry->key; struct device *dev = ethsw->dev; u8 *cmd_buff; @@ -136,7 +136,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, } err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, - acl_tbl->id, acl_entry_cfg); + filter_block->acl_id, acl_entry_cfg); dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), DMA_TO_DEVICE); @@ -150,12 +150,13 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, return 0; } -static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl, - struct dpaa2_switch_acl_entry *entry) +static int +dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, + struct dpaa2_switch_acl_entry *entry) { struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg; struct dpsw_acl_key *acl_key = &entry->key; - struct ethsw_core *ethsw = acl_tbl->ethsw; + struct ethsw_core *ethsw = block->ethsw; struct device *dev = ethsw->dev; u8 *cmd_buff; int err; @@ -175,7 +176,7 @@ static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl, } err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, - acl_tbl->id, acl_entry_cfg); + block->acl_id, acl_entry_cfg); dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), DMA_TO_DEVICE); @@ -190,19 +191,19 @@ static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl, } static int -dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_filter_block *block, struct dpaa2_switch_acl_entry *entry) { struct dpaa2_switch_acl_entry *tmp; struct list_head *pos, *n; int index = 0; - if (list_empty(&acl_tbl->entries)) { - list_add(&entry->list, &acl_tbl->entries); + if (list_empty(&block->acl_entries)) { + list_add(&entry->list, &block->acl_entries); return index; } - list_for_each_safe(pos, n, &acl_tbl->entries) { + list_for_each_safe(pos, n, &block->acl_entries) { tmp = list_entry(pos, struct dpaa2_switch_acl_entry, list); if (entry->prio < tmp->prio) break; @@ -213,13 +214,13 @@ dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl, } static struct dpaa2_switch_acl_entry* -dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_filter_block *block, int index) { struct dpaa2_switch_acl_entry *tmp; int i = 0; - list_for_each_entry(tmp, &acl_tbl->entries, list) { + list_for_each_entry(tmp, &block->acl_entries, list) { if (i == index) return tmp; ++i; @@ -229,37 +230,38 @@ dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl, } static int -dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_filter_block *block, struct dpaa2_switch_acl_entry *entry, int precedence) { int err; - err = dpaa2_switch_acl_entry_remove(acl_tbl, entry); + err = dpaa2_switch_acl_entry_remove(block, entry); if (err) return err; entry->cfg.precedence = precedence; - return dpaa2_switch_acl_entry_add(acl_tbl, entry); + return dpaa2_switch_acl_entry_add(block, entry); } -static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl, - struct dpaa2_switch_acl_entry *entry) +static int +dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_filter_block *block, + struct dpaa2_switch_acl_entry *entry) { struct dpaa2_switch_acl_entry *tmp; int index, i, precedence, err; /* Add the new ACL entry to the linked list and get its index */ - index = dpaa2_switch_acl_entry_add_to_list(acl_tbl, entry); + index = dpaa2_switch_acl_entry_add_to_list(block, entry); /* Move up in priority the ACL entries to make space * for the new filter. */ - precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - acl_tbl->num_rules - 1; + precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - block->num_acl_rules - 1; for (i = 0; i < index; i++) { - tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i); + tmp = dpaa2_switch_acl_entry_get_by_index(block, i); - err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp, + err = dpaa2_switch_acl_entry_set_precedence(block, tmp, precedence); if (err) return err; @@ -269,19 +271,19 @@ static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl, /* Add the new entry to hardware */ entry->cfg.precedence = precedence; - err = dpaa2_switch_acl_entry_add(acl_tbl, entry); - acl_tbl->num_rules++; + err = dpaa2_switch_acl_entry_add(block, entry); + block->num_acl_rules++; return err; } static struct dpaa2_switch_acl_entry * -dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_filter_block *block, unsigned long cookie) { struct dpaa2_switch_acl_entry *tmp, *n; - list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) { + list_for_each_entry_safe(tmp, n, &block->acl_entries, list) { if (tmp->cookie == cookie) return tmp; } @@ -289,13 +291,13 @@ dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl, } static int -dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_filter_block *block, struct dpaa2_switch_acl_entry *entry) { struct dpaa2_switch_acl_entry *tmp, *n; int index = 0; - list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) { + list_for_each_entry_safe(tmp, n, &block->acl_entries, list) { if (tmp->cookie == entry->cookie) return index; index++; @@ -303,21 +305,34 @@ dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl, return -ENOENT; } +static struct dpaa2_switch_mirror_entry * +dpaa2_switch_mirror_find_entry_by_cookie(struct dpaa2_switch_filter_block *block, + unsigned long cookie) +{ + struct dpaa2_switch_mirror_entry *tmp, *n; + + list_for_each_entry_safe(tmp, n, &block->mirror_entries, list) { + if (tmp->cookie == cookie) + return tmp; + } + return NULL; +} + static int -dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_filter_block *block, struct dpaa2_switch_acl_entry *entry) { struct dpaa2_switch_acl_entry *tmp; int index, i, precedence, err; - index = dpaa2_switch_acl_entry_get_index(acl_tbl, entry); + index = dpaa2_switch_acl_entry_get_index(block, entry); /* Remove from hardware the ACL entry */ - err = dpaa2_switch_acl_entry_remove(acl_tbl, entry); + err = dpaa2_switch_acl_entry_remove(block, entry); if (err) return err; - acl_tbl->num_rules--; + block->num_acl_rules--; /* Remove it from the list also */ list_del(&entry->list); @@ -325,8 +340,8 @@ dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl, /* Move down in priority the entries over the deleted one */ precedence = entry->cfg.precedence; for (i = index - 1; i >= 0; i--) { - tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i); - err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp, + tmp = dpaa2_switch_acl_entry_get_by_index(block, i); + err = dpaa2_switch_acl_entry_set_precedence(block, tmp, precedence); if (err) return err; @@ -339,10 +354,10 @@ dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl, return 0; } -static int dpaa2_switch_tc_parse_action(struct ethsw_core *ethsw, - struct flow_action_entry *cls_act, - struct dpsw_acl_result *dpsw_act, - struct netlink_ext_ack *extack) +static int dpaa2_switch_tc_parse_action_acl(struct ethsw_core *ethsw, + struct flow_action_entry *cls_act, + struct dpsw_acl_result *dpsw_act, + struct netlink_ext_ack *extack) { int err = 0; @@ -374,22 +389,110 @@ out: return err; } -int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl, +static int +dpaa2_switch_block_add_mirror(struct dpaa2_switch_filter_block *block, + struct dpaa2_switch_mirror_entry *entry, + u16 to, struct netlink_ext_ack *extack) +{ + unsigned long block_ports = block->ports; + struct ethsw_core *ethsw = block->ethsw; + struct ethsw_port_priv *port_priv; + unsigned long ports_added = 0; + u16 vlan = entry->cfg.vlan_id; + bool mirror_port_enabled; + int err, port; + + /* Setup the mirroring port */ + mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs); + if (!mirror_port_enabled) { + err = dpsw_set_reflection_if(ethsw->mc_io, 0, + ethsw->dpsw_handle, to); + if (err) + return err; + ethsw->mirror_port = to; + } + + /* Setup the same egress mirroring configuration on all the switch + * ports that share the same filter block. + */ + for_each_set_bit(port, &block_ports, ethsw->sw_attr.num_ifs) { + port_priv = ethsw->ports[port]; + + /* We cannot add a per VLAN mirroring rule if the VLAN in + * question is not installed on the switch port. + */ + if (entry->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_VLAN && + !(port_priv->vlans[vlan] & ETHSW_VLAN_MEMBER)) { + NL_SET_ERR_MSG(extack, + "VLAN must be installed on the switch port"); + err = -EINVAL; + goto err_remove_filters; + } + + err = dpsw_if_add_reflection(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port, &entry->cfg); + if (err) + goto err_remove_filters; + + ports_added |= BIT(port); + } + + list_add(&entry->list, &block->mirror_entries); + + return 0; + +err_remove_filters: + for_each_set_bit(port, &ports_added, ethsw->sw_attr.num_ifs) { + dpsw_if_remove_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle, + port, &entry->cfg); + } + + if (!mirror_port_enabled) + ethsw->mirror_port = ethsw->sw_attr.num_ifs; + + return err; +} + +static int +dpaa2_switch_block_remove_mirror(struct dpaa2_switch_filter_block *block, + struct dpaa2_switch_mirror_entry *entry) +{ + struct dpsw_reflection_cfg *cfg = &entry->cfg; + unsigned long block_ports = block->ports; + struct ethsw_core *ethsw = block->ethsw; + int port; + + /* Remove this mirroring configuration from all the ports belonging to + * the filter block. + */ + for_each_set_bit(port, &block_ports, ethsw->sw_attr.num_ifs) + dpsw_if_remove_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle, + port, cfg); + + /* Also remove it from the list of mirror filters */ + list_del(&entry->list); + kfree(entry); + + /* If this was the last mirror filter, then unset the mirror port */ + if (list_empty(&block->mirror_entries)) + ethsw->mirror_port = ethsw->sw_attr.num_ifs; + + return 0; +} + +static int +dpaa2_switch_cls_flower_replace_acl(struct dpaa2_switch_filter_block *block, struct flow_cls_offload *cls) { struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct netlink_ext_ack *extack = cls->common.extack; - struct ethsw_core *ethsw = acl_tbl->ethsw; struct dpaa2_switch_acl_entry *acl_entry; + struct ethsw_core *ethsw = block->ethsw; struct flow_action_entry *act; int err; - if (!flow_offload_has_one_action(&rule->action)) { - NL_SET_ERR_MSG(extack, "Only singular actions are supported"); - return -EOPNOTSUPP; - } - - if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) { + if (dpaa2_switch_acl_tbl_is_full(block)) { NL_SET_ERR_MSG(extack, "Maximum filter capacity reached"); return -ENOMEM; } @@ -403,15 +506,15 @@ int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl, goto free_acl_entry; act = &rule->action.entries[0]; - err = dpaa2_switch_tc_parse_action(ethsw, act, - &acl_entry->cfg.result, extack); + err = dpaa2_switch_tc_parse_action_acl(ethsw, act, + &acl_entry->cfg.result, extack); if (err) goto free_acl_entry; acl_entry->prio = cls->common.prio; acl_entry->cookie = cls->cookie; - err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry); + err = dpaa2_switch_acl_tbl_add_entry(block, acl_entry); if (err) goto free_acl_entry; @@ -423,33 +526,171 @@ free_acl_entry: return err; } -int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, - struct flow_cls_offload *cls) +static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls, + u16 *vlan) { - struct dpaa2_switch_acl_entry *entry; + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + struct netlink_ext_ack *extack = cls->common.extack; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_VLAN))) { + NL_SET_ERR_MSG_MOD(extack, + "Mirroring is supported only per VLAN"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); - entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie); - if (!entry) - return 0; + if (match.mask->vlan_priority != 0 || + match.mask->vlan_dei != 0) { + NL_SET_ERR_MSG_MOD(extack, + "Only matching on VLAN ID supported"); + return -EOPNOTSUPP; + } - return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry); + if (match.mask->vlan_id != 0xFFF) { + NL_SET_ERR_MSG_MOD(extack, + "Masked matching not supported"); + return -EOPNOTSUPP; + } + + *vlan = (u16)match.key->vlan_id; + } + + return 0; } -int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl, - struct tc_cls_matchall_offload *cls) +static int +dpaa2_switch_cls_flower_replace_mirror(struct dpaa2_switch_filter_block *block, + struct flow_cls_offload *cls) { struct netlink_ext_ack *extack = cls->common.extack; - struct ethsw_core *ethsw = acl_tbl->ethsw; - struct dpaa2_switch_acl_entry *acl_entry; - struct flow_action_entry *act; + struct dpaa2_switch_mirror_entry *mirror_entry; + struct ethsw_core *ethsw = block->ethsw; + struct dpaa2_switch_mirror_entry *tmp; + struct flow_action_entry *cls_act; + struct list_head *pos, *n; + bool mirror_port_enabled; + u16 if_id, vlan; int err; - if (!flow_offload_has_one_action(&cls->rule->action)) { + mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs); + cls_act = &cls->rule->action.entries[0]; + + /* Offload rules only when the destination is a DPAA2 switch port */ + if (!dpaa2_switch_port_dev_check(cls_act->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Destination not a DPAA2 switch port"); + return -EOPNOTSUPP; + } + if_id = dpaa2_switch_get_index(ethsw, cls_act->dev); + + /* We have a single mirror port but can configure egress mirroring on + * all the other switch ports. We need to allow mirroring rules only + * when the destination port is the same. + */ + if (mirror_port_enabled && ethsw->mirror_port != if_id) { + NL_SET_ERR_MSG_MOD(extack, + "Multiple mirror ports not supported"); + return -EBUSY; + } + + /* Parse the key */ + err = dpaa2_switch_flower_parse_mirror_key(cls, &vlan); + if (err) + return err; + + /* Make sure that we don't already have a mirror rule with the same + * configuration. + */ + list_for_each_safe(pos, n, &block->mirror_entries) { + tmp = list_entry(pos, struct dpaa2_switch_mirror_entry, list); + + if (tmp->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_VLAN && + tmp->cfg.vlan_id == vlan) { + NL_SET_ERR_MSG_MOD(extack, + "VLAN mirror filter already installed"); + return -EBUSY; + } + } + + mirror_entry = kzalloc(sizeof(*mirror_entry), GFP_KERNEL); + if (!mirror_entry) + return -ENOMEM; + + mirror_entry->cfg.filter = DPSW_REFLECTION_FILTER_INGRESS_VLAN; + mirror_entry->cfg.vlan_id = vlan; + mirror_entry->cookie = cls->cookie; + + return dpaa2_switch_block_add_mirror(block, mirror_entry, if_id, + extack); +} + +int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_filter_block *block, + struct flow_cls_offload *cls) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct netlink_ext_ack *extack = cls->common.extack; + struct flow_action_entry *act; + + if (!flow_offload_has_one_action(&rule->action)) { NL_SET_ERR_MSG(extack, "Only singular actions are supported"); return -EOPNOTSUPP; } - if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) { + act = &rule->action.entries[0]; + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_TRAP: + case FLOW_ACTION_DROP: + return dpaa2_switch_cls_flower_replace_acl(block, cls); + case FLOW_ACTION_MIRRED: + return dpaa2_switch_cls_flower_replace_mirror(block, cls); + default: + NL_SET_ERR_MSG_MOD(extack, "Action not supported"); + return -EOPNOTSUPP; + } +} + +int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_filter_block *block, + struct flow_cls_offload *cls) +{ + struct dpaa2_switch_mirror_entry *mirror_entry; + struct dpaa2_switch_acl_entry *acl_entry; + + /* If this filter is a an ACL one, remove it */ + acl_entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(block, + cls->cookie); + if (acl_entry) + return dpaa2_switch_acl_tbl_remove_entry(block, acl_entry); + + /* If not, then it has to be a mirror */ + mirror_entry = dpaa2_switch_mirror_find_entry_by_cookie(block, + cls->cookie); + if (mirror_entry) + return dpaa2_switch_block_remove_mirror(block, + mirror_entry); + + return 0; +} + +static int +dpaa2_switch_cls_matchall_replace_acl(struct dpaa2_switch_filter_block *block, + struct tc_cls_matchall_offload *cls) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct ethsw_core *ethsw = block->ethsw; + struct dpaa2_switch_acl_entry *acl_entry; + struct flow_action_entry *act; + int err; + + if (dpaa2_switch_acl_tbl_is_full(block)) { NL_SET_ERR_MSG(extack, "Maximum filter capacity reached"); return -ENOMEM; } @@ -459,15 +700,15 @@ int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl, return -ENOMEM; act = &cls->rule->action.entries[0]; - err = dpaa2_switch_tc_parse_action(ethsw, act, - &acl_entry->cfg.result, extack); + err = dpaa2_switch_tc_parse_action_acl(ethsw, act, + &acl_entry->cfg.result, extack); if (err) goto free_acl_entry; acl_entry->prio = cls->common.prio; acl_entry->cookie = cls->cookie; - err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry); + err = dpaa2_switch_acl_tbl_add_entry(block, acl_entry); if (err) goto free_acl_entry; @@ -479,14 +720,159 @@ free_acl_entry: return err; } -int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, +static int +dpaa2_switch_cls_matchall_replace_mirror(struct dpaa2_switch_filter_block *block, + struct tc_cls_matchall_offload *cls) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct dpaa2_switch_mirror_entry *mirror_entry; + struct ethsw_core *ethsw = block->ethsw; + struct dpaa2_switch_mirror_entry *tmp; + struct flow_action_entry *cls_act; + struct list_head *pos, *n; + bool mirror_port_enabled; + u16 if_id; + + mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs); + cls_act = &cls->rule->action.entries[0]; + + /* Offload rules only when the destination is a DPAA2 switch port */ + if (!dpaa2_switch_port_dev_check(cls_act->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Destination not a DPAA2 switch port"); + return -EOPNOTSUPP; + } + if_id = dpaa2_switch_get_index(ethsw, cls_act->dev); + + /* We have a single mirror port but can configure egress mirroring on + * all the other switch ports. We need to allow mirroring rules only + * when the destination port is the same. + */ + if (mirror_port_enabled && ethsw->mirror_port != if_id) { + NL_SET_ERR_MSG_MOD(extack, + "Multiple mirror ports not supported"); + return -EBUSY; + } + + /* Make sure that we don't already have a mirror rule with the same + * configuration. One matchall rule per block is the maximum. + */ + list_for_each_safe(pos, n, &block->mirror_entries) { + tmp = list_entry(pos, struct dpaa2_switch_mirror_entry, list); + + if (tmp->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_ALL) { + NL_SET_ERR_MSG_MOD(extack, + "Matchall mirror filter already installed"); + return -EBUSY; + } + } + + mirror_entry = kzalloc(sizeof(*mirror_entry), GFP_KERNEL); + if (!mirror_entry) + return -ENOMEM; + + mirror_entry->cfg.filter = DPSW_REFLECTION_FILTER_INGRESS_ALL; + mirror_entry->cookie = cls->cookie; + + return dpaa2_switch_block_add_mirror(block, mirror_entry, if_id, + extack); +} + +int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_filter_block *block, + struct tc_cls_matchall_offload *cls) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct flow_action_entry *act; + + if (!flow_offload_has_one_action(&cls->rule->action)) { + NL_SET_ERR_MSG(extack, "Only singular actions are supported"); + return -EOPNOTSUPP; + } + + act = &cls->rule->action.entries[0]; + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_TRAP: + case FLOW_ACTION_DROP: + return dpaa2_switch_cls_matchall_replace_acl(block, cls); + case FLOW_ACTION_MIRRED: + return dpaa2_switch_cls_matchall_replace_mirror(block, cls); + default: + NL_SET_ERR_MSG_MOD(extack, "Action not supported"); + return -EOPNOTSUPP; + } +} + +int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block, + struct ethsw_port_priv *port_priv) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_mirror_entry *tmp; + int err; + + list_for_each_entry(tmp, &block->mirror_entries, list) { + err = dpsw_if_add_reflection(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port_priv->idx, &tmp->cfg); + if (err) + goto unwind_add; + } + + return 0; + +unwind_add: + list_for_each_entry(tmp, &block->mirror_entries, list) + dpsw_if_remove_reflection(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port_priv->idx, &tmp->cfg); + + return err; +} + +int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block, + struct ethsw_port_priv *port_priv) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_mirror_entry *tmp; + int err; + + list_for_each_entry(tmp, &block->mirror_entries, list) { + err = dpsw_if_remove_reflection(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port_priv->idx, &tmp->cfg); + if (err) + goto unwind_remove; + } + + return 0; + +unwind_remove: + list_for_each_entry(tmp, &block->mirror_entries, list) + dpsw_if_add_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->idx, &tmp->cfg); + + return err; +} + +int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_filter_block *block, struct tc_cls_matchall_offload *cls) { - struct dpaa2_switch_acl_entry *entry; + struct dpaa2_switch_mirror_entry *mirror_entry; + struct dpaa2_switch_acl_entry *acl_entry; + + /* If this filter is a an ACL one, remove it */ + acl_entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(block, + cls->cookie); + if (acl_entry) + return dpaa2_switch_acl_tbl_remove_entry(block, + acl_entry); - entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie); - if (!entry) - return 0; + /* If not, then it has to be a mirror */ + mirror_entry = dpaa2_switch_mirror_find_entry_by_cookie(block, + cls->cookie); + if (mirror_entry) + return dpaa2_switch_block_remove_mirror(block, + mirror_entry); - return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry); + return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 68b78642c045..d260993ab2dc 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -41,14 +41,14 @@ static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *e return NULL; } -static struct dpaa2_switch_acl_tbl * -dpaa2_switch_acl_tbl_get_unused(struct ethsw_core *ethsw) +static struct dpaa2_switch_filter_block * +dpaa2_switch_filter_block_get_unused(struct ethsw_core *ethsw) { int i; for (i = 0; i < ethsw->sw_attr.num_ifs; i++) - if (!ethsw->acls[i].in_use) - return ðsw->acls[i]; + if (!ethsw->filter_blocks[i].in_use) + return ðsw->filter_blocks[i]; return NULL; } @@ -594,12 +594,18 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu) return 0; } -static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev) +static int dpaa2_switch_port_link_state_update(struct net_device *netdev) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct dpsw_link_state state; int err; + /* When we manage the MAC/PHY using phylink there is no need + * to manually update the netif_carrier. + */ + if (dpaa2_switch_port_is_type_phy(port_priv)) + return 0; + /* Interrupts are received even though no one issued an 'ifconfig up' * on the switch interface. Ignore these link state update interrupts */ @@ -677,12 +683,14 @@ static int dpaa2_switch_port_open(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; - /* Explicitly set carrier off, otherwise - * netif_carrier_ok() will return true and cause 'ip link show' - * to report the LOWER_UP flag, even though the link - * notification wasn't even received. - */ - netif_carrier_off(netdev); + if (!dpaa2_switch_port_is_type_phy(port_priv)) { + /* Explicitly set carrier off, otherwise + * netif_carrier_ok() will return true and cause 'ip link show' + * to report the LOWER_UP flag, even though the link + * notification wasn't even received. + */ + netif_carrier_off(netdev); + } err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, @@ -692,23 +700,12 @@ static int dpaa2_switch_port_open(struct net_device *netdev) return err; } - /* sync carrier state */ - err = dpaa2_switch_port_carrier_state_sync(netdev); - if (err) { - netdev_err(netdev, - "dpaa2_switch_port_carrier_state_sync err %d\n", err); - goto err_carrier_sync; - } - dpaa2_switch_enable_ctrl_if_napi(ethsw); - return 0; + if (dpaa2_switch_port_is_type_phy(port_priv)) + phylink_start(port_priv->mac->phylink); -err_carrier_sync: - dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, - port_priv->ethsw_data->dpsw_handle, - port_priv->idx); - return err; + return 0; } static int dpaa2_switch_port_stop(struct net_device *netdev) @@ -717,6 +714,13 @@ static int dpaa2_switch_port_stop(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; + if (dpaa2_switch_port_is_type_phy(port_priv)) { + phylink_stop(port_priv->mac->phylink); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } + err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, port_priv->idx); @@ -1127,28 +1131,28 @@ err_exit: } static int -dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_filter_block *filter_block, struct flow_cls_offload *f) { switch (f->command) { case FLOW_CLS_REPLACE: - return dpaa2_switch_cls_flower_replace(acl_tbl, f); + return dpaa2_switch_cls_flower_replace(filter_block, f); case FLOW_CLS_DESTROY: - return dpaa2_switch_cls_flower_destroy(acl_tbl, f); + return dpaa2_switch_cls_flower_destroy(filter_block, f); default: return -EOPNOTSUPP; } } static int -dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_acl_tbl *acl_tbl, +dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_filter_block *block, struct tc_cls_matchall_offload *f) { switch (f->command) { case TC_CLSMATCHALL_REPLACE: - return dpaa2_switch_cls_matchall_replace(acl_tbl, f); + return dpaa2_switch_cls_matchall_replace(block, f); case TC_CLSMATCHALL_DESTROY: - return dpaa2_switch_cls_matchall_destroy(acl_tbl, f); + return dpaa2_switch_cls_matchall_destroy(block, f); default: return -EOPNOTSUPP; } @@ -1170,106 +1174,122 @@ static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type, static LIST_HEAD(dpaa2_switch_block_cb_list); -static int dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv, - struct dpaa2_switch_acl_tbl *acl_tbl) +static int +dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_filter_block *block) { struct ethsw_core *ethsw = port_priv->ethsw_data; struct net_device *netdev = port_priv->netdev; struct dpsw_acl_if_cfg acl_if_cfg; int err; - if (port_priv->acl_tbl) + if (port_priv->filter_block) return -EINVAL; acl_if_cfg.if_id[0] = port_priv->idx; acl_if_cfg.num_ifs = 1; err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, - acl_tbl->id, &acl_if_cfg); + block->acl_id, &acl_if_cfg); if (err) { netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); return err; } - acl_tbl->ports |= BIT(port_priv->idx); - port_priv->acl_tbl = acl_tbl; + block->ports |= BIT(port_priv->idx); + port_priv->filter_block = block; return 0; } static int dpaa2_switch_port_acl_tbl_unbind(struct ethsw_port_priv *port_priv, - struct dpaa2_switch_acl_tbl *acl_tbl) + struct dpaa2_switch_filter_block *block) { struct ethsw_core *ethsw = port_priv->ethsw_data; struct net_device *netdev = port_priv->netdev; struct dpsw_acl_if_cfg acl_if_cfg; int err; - if (port_priv->acl_tbl != acl_tbl) + if (port_priv->filter_block != block) return -EINVAL; acl_if_cfg.if_id[0] = port_priv->idx; acl_if_cfg.num_ifs = 1; err = dpsw_acl_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle, - acl_tbl->id, &acl_if_cfg); + block->acl_id, &acl_if_cfg); if (err) { netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); return err; } - acl_tbl->ports &= ~BIT(port_priv->idx); - port_priv->acl_tbl = NULL; + block->ports &= ~BIT(port_priv->idx); + port_priv->filter_block = NULL; return 0; } static int dpaa2_switch_port_block_bind(struct ethsw_port_priv *port_priv, - struct dpaa2_switch_acl_tbl *acl_tbl) + struct dpaa2_switch_filter_block *block) { - struct dpaa2_switch_acl_tbl *old_acl_tbl = port_priv->acl_tbl; + struct dpaa2_switch_filter_block *old_block = port_priv->filter_block; int err; + /* Offload all the mirror entries found in the block on this new port + * joining it. + */ + err = dpaa2_switch_block_offload_mirror(block, port_priv); + if (err) + return err; + /* If the port is already bound to this ACL table then do nothing. This * can happen when this port is the first one to join a tc block */ - if (port_priv->acl_tbl == acl_tbl) + if (port_priv->filter_block == block) return 0; - err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_acl_tbl); + err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_block); if (err) return err; /* Mark the previous ACL table as being unused if this was the last * port that was using it. */ - if (old_acl_tbl->ports == 0) - old_acl_tbl->in_use = false; + if (old_block->ports == 0) + old_block->in_use = false; - return dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl); + return dpaa2_switch_port_acl_tbl_bind(port_priv, block); } -static int dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv, - struct dpaa2_switch_acl_tbl *acl_tbl) +static int +dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_filter_block *block) { struct ethsw_core *ethsw = port_priv->ethsw_data; - struct dpaa2_switch_acl_tbl *new_acl_tbl; + struct dpaa2_switch_filter_block *new_block; int err; + /* Unoffload all the mirror entries found in the block from the + * port leaving it. + */ + err = dpaa2_switch_block_unoffload_mirror(block, port_priv); + if (err) + return err; + /* We are the last port that leaves a block (an ACL table). * We'll continue to use this table. */ - if (acl_tbl->ports == BIT(port_priv->idx)) + if (block->ports == BIT(port_priv->idx)) return 0; - err = dpaa2_switch_port_acl_tbl_unbind(port_priv, acl_tbl); + err = dpaa2_switch_port_acl_tbl_unbind(port_priv, block); if (err) return err; - if (acl_tbl->ports == 0) - acl_tbl->in_use = false; + if (block->ports == 0) + block->in_use = false; - new_acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw); - new_acl_tbl->in_use = true; - return dpaa2_switch_port_acl_tbl_bind(port_priv, new_acl_tbl); + new_block = dpaa2_switch_filter_block_get_unused(ethsw); + new_block->in_use = true; + return dpaa2_switch_port_acl_tbl_bind(port_priv, new_block); } static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev, @@ -1277,7 +1297,7 @@ static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev, { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct ethsw_core *ethsw = port_priv->ethsw_data; - struct dpaa2_switch_acl_tbl *acl_tbl; + struct dpaa2_switch_filter_block *filter_block; struct flow_block_cb *block_cb; bool register_block = false; int err; @@ -1287,24 +1307,24 @@ static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev, ethsw); if (!block_cb) { - /* If the ACL table is not already known, then this port must - * be the first to join it. In this case, we can just continue - * to use our private table + /* If the filter block is not already known, then this port + * must be the first to join it. In this case, we can just + * continue to use our private table */ - acl_tbl = port_priv->acl_tbl; + filter_block = port_priv->filter_block; block_cb = flow_block_cb_alloc(dpaa2_switch_port_setup_tc_block_cb_ig, - ethsw, acl_tbl, NULL); + ethsw, filter_block, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); register_block = true; } else { - acl_tbl = flow_block_cb_priv(block_cb); + filter_block = flow_block_cb_priv(block_cb); } flow_block_cb_incref(block_cb); - err = dpaa2_switch_port_block_bind(port_priv, acl_tbl); + err = dpaa2_switch_port_block_bind(port_priv, filter_block); if (err) goto err_block_bind; @@ -1327,7 +1347,7 @@ static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev, { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct ethsw_core *ethsw = port_priv->ethsw_data; - struct dpaa2_switch_acl_tbl *acl_tbl; + struct dpaa2_switch_filter_block *filter_block; struct flow_block_cb *block_cb; int err; @@ -1337,8 +1357,8 @@ static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev, if (!block_cb) return; - acl_tbl = flow_block_cb_priv(block_cb); - err = dpaa2_switch_port_block_unbind(port_priv, acl_tbl); + filter_block = flow_block_cb_priv(block_cb); + err = dpaa2_switch_port_block_unbind(port_priv, filter_block); if (!err && !flow_block_cb_decref(block_cb)) { flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); @@ -1403,41 +1423,103 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev) return netdev->netdev_ops == &dpaa2_switch_port_ops; } -static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw) +static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv) { - int i; + struct fsl_mc_device *dpsw_port_dev, *dpmac_dev; + struct dpaa2_mac *mac; + int err; - for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { - dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev); - dpaa2_switch_port_set_mac_addr(ethsw->ports[i]); + dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent); + dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx); + + if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER) + return PTR_ERR(dpmac_dev); + + if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) + return 0; + + mac = kzalloc(sizeof(*mac), GFP_KERNEL); + if (!mac) + return -ENOMEM; + + mac->mc_dev = dpmac_dev; + mac->mc_io = port_priv->ethsw_data->mc_io; + mac->net_dev = port_priv->netdev; + + err = dpaa2_mac_open(mac); + if (err) + goto err_free_mac; + port_priv->mac = mac; + + if (dpaa2_switch_port_is_type_phy(port_priv)) { + err = dpaa2_mac_connect(mac); + if (err) { + netdev_err(port_priv->netdev, + "Error connecting to the MAC endpoint %pe\n", + ERR_PTR(err)); + goto err_close_mac; + } } + + return 0; + +err_close_mac: + dpaa2_mac_close(mac); + port_priv->mac = NULL; +err_free_mac: + kfree(mac); + return err; +} + +static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv) +{ + if (dpaa2_switch_port_is_type_phy(port_priv)) + dpaa2_mac_disconnect(port_priv->mac); + + if (!dpaa2_switch_port_has_mac(port_priv)) + return; + + dpaa2_mac_close(port_priv->mac); + kfree(port_priv->mac); + port_priv->mac = NULL; } static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) { struct device *dev = (struct device *)arg; struct ethsw_core *ethsw = dev_get_drvdata(dev); - - /* Mask the events and the if_id reserved bits to be cleared on read */ - u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000; - int err; + struct ethsw_port_priv *port_priv; + u32 status = ~0; + int err, if_id; err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, &status); if (err) { dev_err(dev, "Can't get irq status (err %d)\n", err); - - err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, - DPSW_IRQ_INDEX_IF, 0xFFFFFFFF); - if (err) - dev_err(dev, "Can't clear irq status (err %d)\n", err); goto out; } - if (status & DPSW_IRQ_EVENT_LINK_CHANGED) - dpaa2_switch_links_state_update(ethsw); + if_id = (status & 0xFFFF0000) >> 16; + port_priv = ethsw->ports[if_id]; + + if (status & DPSW_IRQ_EVENT_LINK_CHANGED) { + dpaa2_switch_port_link_state_update(port_priv->netdev); + dpaa2_switch_port_set_mac_addr(port_priv); + } + + if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) { + if (dpaa2_switch_port_has_mac(port_priv)) + dpaa2_switch_port_disconnect_mac(port_priv); + else + dpaa2_switch_port_connect_mac(port_priv); + } out: + err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, status); + if (err) + dev_err(dev, "Can't clear irq status (err %d)\n", err); + return IRQ_HANDLED; } @@ -1889,8 +1971,12 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev, return notifier_from_errno(err); } +static struct notifier_block dpaa2_switch_port_switchdev_nb; +static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb; + static int dpaa2_switch_port_bridge_join(struct net_device *netdev, - struct net_device *upper_dev) + struct net_device *upper_dev, + struct netlink_ext_ack *extack) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct ethsw_core *ethsw = port_priv->ethsw_data; @@ -1906,8 +1992,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, other_port_priv = netdev_priv(other_dev); if (other_port_priv->ethsw_data != port_priv->ethsw_data) { - netdev_err(netdev, - "Interface from a different DPSW is in the bridge already!\n"); + NL_SET_ERR_MSG_MOD(extack, + "Interface from a different DPSW is in the bridge already"); return -EINVAL; } } @@ -1929,8 +2015,16 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, if (err) goto err_egress_flood; + err = switchdev_bridge_port_offload(netdev, netdev, NULL, + &dpaa2_switch_port_switchdev_nb, + &dpaa2_switch_port_switchdev_blocking_nb, + false, extack); + if (err) + goto err_switchdev_offload; + return 0; +err_switchdev_offload: err_egress_flood: dpaa2_switch_port_set_fdb(port_priv, NULL); return err; @@ -1956,6 +2050,13 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid); } +static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev) +{ + switchdev_bridge_port_unoffload(netdev, NULL, + &dpaa2_switch_port_switchdev_nb, + &dpaa2_switch_port_switchdev_blocking_nb); +} + static int dpaa2_switch_port_bridge_leave(struct net_device *netdev) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); @@ -2029,6 +2130,28 @@ static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *net return 0; } +static int +dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev, + struct net_device *upper_dev, + struct netlink_ext_ack *extack) +{ + int err; + + if (!br_vlan_enabled(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge"); + return -EOPNOTSUPP; + } + + err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot join a bridge while VLAN uppers are present"); + return 0; + } + + return 0; +} + static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -2049,25 +2172,23 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, if (!netif_is_bridge_master(upper_dev)) break; - if (!br_vlan_enabled(upper_dev)) { - NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge"); - err = -EOPNOTSUPP; + err = dpaa2_switch_prechangeupper_sanity_checks(netdev, + upper_dev, + extack); + if (err) goto out; - } - err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Cannot join a bridge while VLAN uppers are present"); - goto out; - } + if (!info->linking) + dpaa2_switch_port_pre_bridge_leave(netdev); break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (netif_is_bridge_master(upper_dev)) { if (info->linking) - err = dpaa2_switch_port_bridge_join(netdev, upper_dev); + err = dpaa2_switch_port_bridge_join(netdev, + upper_dev, + extack); else err = dpaa2_switch_port_bridge_leave(netdev); } @@ -2952,7 +3073,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv, acl_entry.cfg.precedence = 0; acl_entry.cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF; - return dpaa2_switch_acl_entry_add(port_priv->acl_tbl, &acl_entry); + return dpaa2_switch_acl_entry_add(port_priv->filter_block, &acl_entry); } static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) @@ -2965,7 +3086,7 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) }; struct net_device *netdev = port_priv->netdev; struct ethsw_core *ethsw = port_priv->ethsw_data; - struct dpaa2_switch_acl_tbl *acl_tbl; + struct dpaa2_switch_filter_block *filter_block; struct dpsw_fdb_cfg fdb_cfg = {0}; struct dpsw_if_attr dpsw_if_attr; struct dpaa2_switch_fdb *fdb; @@ -3020,14 +3141,15 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) return err; } - acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw); - acl_tbl->ethsw = ethsw; - acl_tbl->id = acl_tbl_id; - acl_tbl->in_use = true; - acl_tbl->num_rules = 0; - INIT_LIST_HEAD(&acl_tbl->entries); + filter_block = dpaa2_switch_filter_block_get_unused(ethsw); + filter_block->ethsw = ethsw; + filter_block->acl_id = acl_tbl_id; + filter_block->in_use = true; + filter_block->num_acl_rules = 0; + INIT_LIST_HEAD(&filter_block->acl_entries); + INIT_LIST_HEAD(&filter_block->mirror_entries); - err = dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl); + err = dpaa2_switch_port_acl_tbl_bind(port_priv, filter_block); if (err) return err; @@ -3077,11 +3199,12 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { port_priv = ethsw->ports[i]; unregister_netdev(port_priv->netdev); + dpaa2_switch_port_disconnect_mac(port_priv); free_netdev(port_priv->netdev); } kfree(ethsw->fdbs); - kfree(ethsw->acls); + kfree(ethsw->filter_blocks); kfree(ethsw->ports); dpaa2_switch_takedown(sw_dev); @@ -3156,6 +3279,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, goto err_port_probe; port_priv->learn_ena = false; + err = dpaa2_switch_port_connect_mac(port_priv); + if (err) + goto err_port_probe; + return 0; err_port_probe: @@ -3209,9 +3336,10 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) goto err_free_ports; } - ethsw->acls = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->acls), - GFP_KERNEL); - if (!ethsw->acls) { + ethsw->filter_blocks = kcalloc(ethsw->sw_attr.num_ifs, + sizeof(*ethsw->filter_blocks), + GFP_KERNEL); + if (!ethsw->filter_blocks) { err = -ENOMEM; goto err_free_fdbs; } @@ -3231,17 +3359,16 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) ðsw->fq[i].napi, dpaa2_switch_poll, NAPI_POLL_WEIGHT); - err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); - if (err) { - dev_err(ethsw->dev, "dpsw_enable err %d\n", err); - goto err_free_netdev; - } - /* Setup IRQs */ err = dpaa2_switch_setup_irqs(sw_dev); if (err) goto err_stop; + /* By convention, if the mirror port is equal to the number of switch + * interfaces, then mirroring of any kind is disabled. + */ + ethsw->mirror_port = ethsw->sw_attr.num_ifs; + /* Register the netdev only when the entire setup is done and the * switch port interfaces are ready to receive traffic */ @@ -3264,7 +3391,7 @@ err_stop: err_free_netdev: for (i--; i >= 0; i--) free_netdev(ethsw->ports[i]->netdev); - kfree(ethsw->acls); + kfree(ethsw->filter_blocks); err_free_fdbs: kfree(ethsw->fdbs); err_free_ports: diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h index bdef71f234cb..0002dca4d417 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -21,6 +21,7 @@ #include <net/pkt_cls.h> #include <soc/fsl/dpaa2-io.h> +#include "dpaa2-mac.h" #include "dpsw.h" /* Number of IRQs supported */ @@ -113,20 +114,29 @@ struct dpaa2_switch_acl_entry { struct dpsw_acl_key key; }; -struct dpaa2_switch_acl_tbl { - struct list_head entries; +struct dpaa2_switch_mirror_entry { + struct list_head list; + struct dpsw_reflection_cfg cfg; + unsigned long cookie; + u16 if_id; +}; + +struct dpaa2_switch_filter_block { struct ethsw_core *ethsw; u64 ports; - - u16 id; - u8 num_rules; bool in_use; + + struct list_head acl_entries; + u16 acl_id; + u8 num_acl_rules; + + struct list_head mirror_entries; }; static inline bool -dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_acl_tbl *acl_tbl) +dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_filter_block *filter_block) { - if ((acl_tbl->num_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >= + if ((filter_block->num_acl_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >= DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) return true; return false; @@ -149,7 +159,8 @@ struct ethsw_port_priv { bool ucast_flood; bool learn_ena; - struct dpaa2_switch_acl_tbl *acl_tbl; + struct dpaa2_switch_filter_block *filter_block; + struct dpaa2_mac *mac; }; /* Switch data */ @@ -175,7 +186,8 @@ struct ethsw_core { int napi_users; struct dpaa2_switch_fdb *fdbs; - struct dpaa2_switch_acl_tbl *acls; + struct dpaa2_switch_filter_block *filter_blocks; + u16 mirror_port; }; static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw, @@ -215,6 +227,22 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw) return true; } +static inline bool +dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv) +{ + if (port_priv->mac && + (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY || + port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE)) + return true; + + return false; +} + +static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv) +{ + return port_priv->mac ? true : false; +} + bool dpaa2_switch_port_dev_check(const struct net_device *netdev); int dpaa2_switch_port_vlans_add(struct net_device *netdev, @@ -229,18 +257,24 @@ typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv, /* TC offload */ -int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl, +int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_filter_block *block, struct flow_cls_offload *cls); -int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, +int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_filter_block *block, struct flow_cls_offload *cls); -int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl, +int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_filter_block *block, struct tc_cls_matchall_offload *cls); -int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, +int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_filter_block *block, struct tc_cls_matchall_offload *cls); -int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, +int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *block, struct dpaa2_switch_acl_entry *entry); + +int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block, + struct ethsw_port_priv *port_priv); + +int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block, + struct ethsw_port_priv *port_priv); #endif /* __ETHSW_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h index cb13e740f72b..397d55f2bd99 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h @@ -39,11 +39,16 @@ #define DPSW_CMDID_GET_IRQ_STATUS DPSW_CMD_ID(0x016) #define DPSW_CMDID_CLEAR_IRQ_STATUS DPSW_CMD_ID(0x017) +#define DPSW_CMDID_SET_REFLECTION_IF DPSW_CMD_ID(0x022) + #define DPSW_CMDID_IF_SET_TCI DPSW_CMD_ID(0x030) #define DPSW_CMDID_IF_SET_STP DPSW_CMD_ID(0x031) #define DPSW_CMDID_IF_GET_COUNTER DPSW_CMD_V2(0x034) +#define DPSW_CMDID_IF_ADD_REFLECTION DPSW_CMD_ID(0x037) +#define DPSW_CMDID_IF_REMOVE_REFLECTION DPSW_CMD_ID(0x038) + #define DPSW_CMDID_IF_ENABLE DPSW_CMD_ID(0x03D) #define DPSW_CMDID_IF_DISABLE DPSW_CMD_ID(0x03E) @@ -533,5 +538,19 @@ struct dpsw_cmd_acl_entry { __le64 pad2[4]; __le64 key_iova; }; + +struct dpsw_cmd_set_reflection_if { + __le16 if_id; +}; + +#define DPSW_FILTER_SHIFT 0 +#define DPSW_FILTER_SIZE 2 + +struct dpsw_cmd_if_reflection { + __le16 if_id; + __le16 vlan_id; + /* only 2 bits from the LSB */ + u8 filter; +}; #pragma pack(pop) #endif /* __FSL_DPSW_CMD_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c index 6352d6d1ecba..ab921d75deb2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c @@ -1579,3 +1579,83 @@ int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, /* send command to mc*/ return mc_send_command(mc_io, &cmd); } + +/** + * dpsw_set_reflection_if() - Set target interface for traffic mirrored + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Id + * + * Only one mirroring destination is allowed per switch + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id) +{ + struct dpsw_cmd_set_reflection_if *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_REFLECTION_IF, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_set_reflection_if *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_add_reflection() - Setup mirroring rule + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @cfg: Reflection configuration + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, const struct dpsw_reflection_cfg *cfg) +{ + struct dpsw_cmd_if_reflection *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ADD_REFLECTION, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_reflection *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id); + dpsw_set_field(cmd_params->filter, FILTER, cfg->filter); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_remove_reflection() - Remove mirroring rule + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @cfg: Reflection configuration + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, const struct dpsw_reflection_cfg *cfg) +{ + struct dpsw_cmd_if_reflection *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_REMOVE_REFLECTION, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_reflection *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id); + dpsw_set_field(cmd_params->filter, FILTER, cfg->filter); + + return mc_send_command(mc_io, &cmd); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h index 5ef221a25b02..b90bd363f47a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h @@ -99,6 +99,11 @@ int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); #define DPSW_IRQ_EVENT_LINK_CHANGED 0x0001 /** + * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint + */ +#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED 0x0002 + +/** * struct dpsw_irq_cfg - IRQ configuration * @addr: Address that must be written to signal a message-based interrupt * @val: Value to write into irq_addr address @@ -752,4 +757,35 @@ int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 acl_id, const struct dpsw_acl_entry_cfg *cfg); + +/** + * enum dpsw_reflection_filter - Filter type for frames to be reflected + * @DPSW_REFLECTION_FILTER_INGRESS_ALL: Reflect all frames + * @DPSW_REFLECTION_FILTER_INGRESS_VLAN: Reflect only frames that belong to + * the particular VLAN defined by vid parameter + * + */ +enum dpsw_reflection_filter { + DPSW_REFLECTION_FILTER_INGRESS_ALL = 0, + DPSW_REFLECTION_FILTER_INGRESS_VLAN = 1 +}; + +/** + * struct dpsw_reflection_cfg - Structure representing the mirroring config + * @filter: Filter type for frames to be mirrored + * @vlan_id: VLAN ID to mirror; valid only when the type is DPSW_INGRESS_VLAN + */ +struct dpsw_reflection_cfg { + enum dpsw_reflection_filter filter; + u16 vlan_id; +}; + +int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id); + +int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, const struct dpsw_reflection_cfg *cfg); + +int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, const struct dpsw_reflection_cfg *cfg); #endif /* __FSL_DPSW_H */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index c84f6c226743..60d94e0a07d6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -735,7 +735,7 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_set_vf_vlan = enetc_pf_set_vf_vlan, .ndo_set_vf_spoofchk = enetc_pf_set_vf_spoofchk, .ndo_set_features = enetc_pf_set_features, - .ndo_do_ioctl = enetc_ioctl, + .ndo_eth_ioctl = enetc_ioctl, .ndo_setup_tc = enetc_setup_tc, .ndo_bpf = enetc_setup_bpf, .ndo_xdp_xmit = enetc_xdp_xmit, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 03090ba7e226..1a9d1e8b772c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -99,7 +99,7 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_get_stats = enetc_get_stats, .ndo_set_mac_address = enetc_vf_set_mac_addr, .ndo_set_features = enetc_vf_set_features, - .ndo_do_ioctl = enetc_ioctl, + .ndo_eth_ioctl = enetc_ioctl, .ndo_setup_tc = enetc_setup_tc, }; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 2e002e4b4b4a..ae3259164395 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -77,6 +77,8 @@ #define FEC_R_DES_ACTIVE_2 0x1e8 /* Rx descriptor active for ring 2 */ #define FEC_X_DES_ACTIVE_2 0x1ec /* Tx descriptor active for ring 2 */ #define FEC_QOS_SCHEME 0x1f0 /* Set multi queues Qos scheme */ +#define FEC_LPI_SLEEP 0x1f4 /* Set IEEE802.3az LPI Sleep Ts time */ +#define FEC_LPI_WAKE 0x1f8 /* Set IEEE802.3az LPI Wake Tw time */ #define FEC_MIIGSK_CFGR 0x300 /* MIIGSK Configuration reg */ #define FEC_MIIGSK_ENR 0x308 /* MIIGSK Enable reg */ @@ -379,6 +381,9 @@ struct bufdesc_ex { #define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF) #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF)) +#define FEC_ENET_TXC_DLY ((uint)0x00010000) +#define FEC_ENET_RXC_DLY ((uint)0x00020000) + /* ENET interrupt coalescing macro define */ #define FEC_ITR_CLK_SEL (0x1 << 30) #define FEC_ITR_EN (0x1 << 31) @@ -472,6 +477,19 @@ struct bufdesc_ex { */ #define FEC_QUIRK_HAS_MULTI_QUEUES (1 << 19) +/* i.MX8MQ ENET IP version add new feature to support IEEE 802.3az EEE + * standard. For the transmission, MAC supply two user registers to set + * Sleep (TS) and Wake (TW) time. + */ +#define FEC_QUIRK_HAS_EEE (1 << 20) + +/* i.MX8QM ENET IP version add new feture to generate delayed TXC/RXC + * as an alternative option to make sure it works well with various PHYs. + * For the implementation of delayed clock, ENET takes synchronized 250MHz + * clocks to generate 2ns delay. + */ +#define FEC_QUIRK_DELAYED_CLKS_SUPPORT (1 << 21) + struct bufdesc_prop { int qid; /* Address of Rx and Tx buffers */ @@ -528,6 +546,7 @@ struct fec_enet_private { struct clk *clk_ref; struct clk *clk_enet_out; struct clk *clk_ptp; + struct clk *clk_2x_txclk; bool ptp_clk_on; struct mutex ptp_clk_mutex; @@ -550,6 +569,8 @@ struct fec_enet_private { uint phy_speed; phy_interface_t phy_interface; struct device_node *phy_node; + bool rgmii_txc_dly; + bool rgmii_rxc_dly; int link; int full_duplex; int speed; @@ -589,6 +610,10 @@ struct fec_enet_private { unsigned int tx_time_itr; unsigned int itr_clk_rate; + /* tx lpi eee mode */ + struct ethtool_eee eee; + unsigned int clk_ref_rate; + u32 rx_copybreak; /* ptp clock period in ns*/ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 7e4c4980ced7..fdff37b87de7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -135,6 +135,26 @@ static const struct fec_devinfo fec_imx6ul_info = { FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII, }; +static const struct fec_devinfo fec_imx8mq_info = { + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | + FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | + FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | + FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | + FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | + FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | + FEC_QUIRK_HAS_EEE, +}; + +static const struct fec_devinfo fec_imx8qm_info = { + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | + FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | + FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | + FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | + FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | + FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | + FEC_QUIRK_DELAYED_CLKS_SUPPORT, +}; + static struct platform_device_id fec_devtype[] = { { /* keep it for coldfire */ @@ -162,6 +182,12 @@ static struct platform_device_id fec_devtype[] = { .name = "imx6ul-fec", .driver_data = (kernel_ulong_t)&fec_imx6ul_info, }, { + .name = "imx8mq-fec", + .driver_data = (kernel_ulong_t)&fec_imx8mq_info, + }, { + .name = "imx8qm-fec", + .driver_data = (kernel_ulong_t)&fec_imx8qm_info, + }, { /* sentinel */ } }; @@ -175,6 +201,8 @@ enum imx_fec_type { MVF600_FEC, IMX6SX_FEC, IMX6UL_FEC, + IMX8MQ_FEC, + IMX8QM_FEC, }; static const struct of_device_id fec_dt_ids[] = { @@ -185,6 +213,8 @@ static const struct of_device_id fec_dt_ids[] = { { .compatible = "fsl,mvf600-fec", .data = &fec_devtype[MVF600_FEC], }, { .compatible = "fsl,imx6sx-fec", .data = &fec_devtype[IMX6SX_FEC], }, { .compatible = "fsl,imx6ul-fec", .data = &fec_devtype[IMX6UL_FEC], }, + { .compatible = "fsl,imx8mq-fec", .data = &fec_devtype[IMX8MQ_FEC], }, + { .compatible = "fsl,imx8qm-fec", .data = &fec_devtype[IMX8QM_FEC], }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fec_dt_ids); @@ -1107,6 +1137,13 @@ fec_restart(struct net_device *ndev) if (fep->bufdesc_ex) ecntl |= (1 << 4); + if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT && + fep->rgmii_txc_dly) + ecntl |= FEC_ENET_TXC_DLY; + if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT && + fep->rgmii_rxc_dly) + ecntl |= FEC_ENET_RXC_DLY; + #ifndef CONFIG_M5272 /* Enable the MIB statistic event counters */ writel(0 << 31, fep->hwp + FEC_MIB_CTRLSTAT); @@ -1970,6 +2007,10 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) if (ret) goto failed_clk_ref; + ret = clk_prepare_enable(fep->clk_2x_txclk); + if (ret) + goto failed_clk_2x_txclk; + fec_enet_phy_reset_after_clk_enable(ndev); } else { clk_disable_unprepare(fep->clk_enet_out); @@ -1980,10 +2021,14 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) mutex_unlock(&fep->ptp_clk_mutex); } clk_disable_unprepare(fep->clk_ref); + clk_disable_unprepare(fep->clk_2x_txclk); } return 0; +failed_clk_2x_txclk: + if (fep->clk_ref) + clk_disable_unprepare(fep->clk_ref); failed_clk_ref: if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1997,6 +2042,34 @@ failed_clk_ptp: return ret; } +static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep, + struct device_node *np) +{ + u32 rgmii_tx_delay, rgmii_rx_delay; + + /* For rgmii tx internal delay, valid values are 0ps and 2000ps */ + if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) { + if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) { + dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps"); + return -EINVAL; + } else if (rgmii_tx_delay == 2000) { + fep->rgmii_txc_dly = true; + } + } + + /* For rgmii rx internal delay, valid values are 0ps and 2000ps */ + if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) { + if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) { + dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps"); + return -EINVAL; + } else if (rgmii_rx_delay == 2000) { + fep->rgmii_rxc_dly = true; + } + } + + return 0; +} + static int fec_enet_mii_probe(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); @@ -2692,6 +2765,92 @@ static int fec_enet_set_tunable(struct net_device *netdev, return ret; } +/* LPI Sleep Ts count base on tx clk (clk_ref). + * The lpi sleep cnt value = X us / (cycle_ns). + */ +static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + return us * (fep->clk_ref_rate / 1000) / 1000; +} + +static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct ethtool_eee *p = &fep->eee; + unsigned int sleep_cycle, wake_cycle; + int ret = 0; + + if (enable) { + ret = phy_init_eee(ndev->phydev, 0); + if (ret) + return ret; + + sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer); + wake_cycle = sleep_cycle; + } else { + sleep_cycle = 0; + wake_cycle = 0; + } + + p->tx_lpi_enabled = enable; + p->eee_enabled = enable; + p->eee_active = enable; + + writel(sleep_cycle, fep->hwp + FEC_LPI_SLEEP); + writel(wake_cycle, fep->hwp + FEC_LPI_WAKE); + + return 0; +} + +static int +fec_enet_get_eee(struct net_device *ndev, struct ethtool_eee *edata) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct ethtool_eee *p = &fep->eee; + + if (!(fep->quirks & FEC_QUIRK_HAS_EEE)) + return -EOPNOTSUPP; + + if (!netif_running(ndev)) + return -ENETDOWN; + + edata->eee_enabled = p->eee_enabled; + edata->eee_active = p->eee_active; + edata->tx_lpi_timer = p->tx_lpi_timer; + edata->tx_lpi_enabled = p->tx_lpi_enabled; + + return phy_ethtool_get_eee(ndev->phydev, edata); +} + +static int +fec_enet_set_eee(struct net_device *ndev, struct ethtool_eee *edata) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct ethtool_eee *p = &fep->eee; + int ret = 0; + + if (!(fep->quirks & FEC_QUIRK_HAS_EEE)) + return -EOPNOTSUPP; + + if (!netif_running(ndev)) + return -ENETDOWN; + + p->tx_lpi_timer = edata->tx_lpi_timer; + + if (!edata->eee_enabled || !edata->tx_lpi_enabled || + !edata->tx_lpi_timer) + ret = fec_enet_eee_mode_set(ndev, false); + else + ret = fec_enet_eee_mode_set(ndev, true); + + if (ret) + return ret; + + return phy_ethtool_set_eee(ndev->phydev, edata); +} + static void fec_enet_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { @@ -2752,6 +2911,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = { .set_tunable = fec_enet_set_tunable, .get_wol = fec_enet_get_wol, .set_wol = fec_enet_set_wol, + .get_eee = fec_enet_get_eee, + .set_eee = fec_enet_set_eee, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, .self_test = net_selftest, @@ -3280,7 +3441,7 @@ static const struct net_device_ops fec_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = fec_timeout, .ndo_set_mac_address = fec_set_mac_address, - .ndo_do_ioctl = fec_enet_ioctl, + .ndo_eth_ioctl = fec_enet_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = fec_poll_controller, #endif @@ -3666,6 +3827,10 @@ fec_probe(struct platform_device *pdev) fep->phy_interface = interface; } + ret = fec_enet_parse_rgmii_delay(fep, np); + if (ret) + goto failed_rgmii_delay; + fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); if (IS_ERR(fep->clk_ipg)) { ret = PTR_ERR(fep->clk_ipg); @@ -3692,6 +3857,14 @@ fec_probe(struct platform_device *pdev) fep->clk_ref = devm_clk_get(&pdev->dev, "enet_clk_ref"); if (IS_ERR(fep->clk_ref)) fep->clk_ref = NULL; + fep->clk_ref_rate = clk_get_rate(fep->clk_ref); + + /* clk_2x_txclk is optional, depends on board */ + if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) { + fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk"); + if (IS_ERR(fep->clk_2x_txclk)) + fep->clk_2x_txclk = NULL; + } fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX; fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp"); @@ -3809,6 +3982,7 @@ failed_clk_ahb: failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: +failed_rgmii_delay: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(phy_node); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 02c47658a215..73ff359a15f1 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -792,7 +792,7 @@ static const struct net_device_ops mpc52xx_fec_netdev_ops = { .ndo_set_rx_mode = mpc52xx_fec_set_multicast_list, .ndo_set_mac_address = mpc52xx_fec_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = mpc52xx_fec_tx_timeout, .ndo_get_stats = mpc52xx_fec_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 6ee325ad35c5..2db6e38a772e 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -900,7 +900,7 @@ static const struct net_device_ops fs_enet_netdev_ops = { .ndo_start_xmit = fs_enet_start_xmit, .ndo_tx_timeout = fs_timeout, .ndo_set_rx_mode = fs_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 9646483137c4..af6ad94bf24a 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -3184,7 +3184,7 @@ static const struct net_device_ops gfar_netdev_ops = { .ndo_set_features = gfar_set_features, .ndo_set_rx_mode = gfar_set_multi, .ndo_tx_timeout = gfar_timeout, - .ndo_do_ioctl = gfar_ioctl, + .ndo_eth_ioctl = gfar_ioctl, .ndo_get_stats64 = gfar_get_stats64, .ndo_change_carrier = fixed_phy_change_carrier, .ndo_set_mac_address = gfar_set_mac_addr, diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 0acfafb73db1..3eb288d10b0c 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3516,7 +3516,7 @@ static const struct net_device_ops ucc_geth_netdev_ops = { .ndo_set_mac_address = ucc_geth_set_mac_addr, .ndo_set_rx_mode = ucc_geth_set_multi, .ndo_tx_timeout = ucc_geth_timeout, - .ndo_do_ioctl = ucc_geth_ioctl, + .ndo_eth_ioctl = ucc_geth_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ucc_netpoll, #endif diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 5bb56b454541..f089d33dd48e 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -322,7 +322,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, tail = ioread32be(&priv->reg_bar0->adminq_event_counter); // Check if next command will overflow the buffer. - if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) { + if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == + (tail & priv->adminq_mask)) { int err; // Flush existing commands to make room. @@ -332,7 +333,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, // Retry. tail = ioread32be(&priv->reg_bar0->adminq_event_counter); - if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) { + if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == + (tail & priv->adminq_mask)) { // This should never happen. We just flushed the // command queue so there should be enough space. return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index bb062b02fb85..094e4a37a295 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -90,6 +90,7 @@ config HNS_ENET config HNS3 tristate "Hisilicon Network Subsystem Support HNS3 (Framework)" depends on PCI + select NET_DEVLINK help This selects the framework support for Hisilicon Network Subsystem 3. This layer facilitates clients like ENET, RoCE and user-space ethernet diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 3c4db4a6b431..22bf914f2dbd 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -685,7 +685,7 @@ static const struct net_device_ops hisi_femac_netdev_ops = { .ndo_open = hisi_femac_net_open, .ndo_stop = hisi_femac_net_close, .ndo_start_xmit = hisi_femac_net_xmit, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_mac_address = hisi_femac_set_mac_address, .ndo_set_rx_mode = hisi_femac_net_set_rx_mode, }; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index ad534f9e41ab..343c605c4be8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1945,7 +1945,7 @@ static const struct net_device_ops hns_nic_netdev_ops = { .ndo_tx_timeout = hns_nic_net_timeout, .ndo_set_mac_address = hns_nic_net_set_mac_address, .ndo_change_mtu = hns_nic_change_mtu, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_features = hns_nic_set_features, .ndo_fix_features = hns_nic_fix_features, .ndo_get_stats64 = hns_nic_get_stats64, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cdb5f14fb6bc..cb8d5da3654f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2852,7 +2852,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_start_xmit = hns3_nic_net_xmit, .ndo_tx_timeout = hns3_nic_net_timeout, .ndo_set_mac_address = hns3_nic_net_set_mac_address, - .ndo_do_ioctl = hns3_nic_do_ioctl, + .ndo_eth_ioctl = hns3_nic_do_ioctl, .ndo_change_mtu = hns3_nic_change_mtu, .ndo_set_features = hns3_nic_set_features, .ndo_features_check = hns3_features_check, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index a685392dbfe9..d1bf5c4c0abb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -7,6 +7,6 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 ccflags-y += -I $(srctree)/$(src) obj-$(CONFIG_HNS3_HCLGE) += hclge.o -hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o hclge_ptp.o +hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o hclge_ptp.o hclge_devlink.o hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c new file mode 100644 index 000000000000..06d29945d4e1 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#include <net/devlink.h> + +#include "hclge_devlink.h" + +static int hclge_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ +#define HCLGE_DEVLINK_FW_STRING_LEN 32 + struct hclge_devlink_priv *priv = devlink_priv(devlink); + char version_str[HCLGE_DEVLINK_FW_STRING_LEN]; + struct hclge_dev *hdev = priv->hdev; + int ret; + + ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (ret) + return ret; + + snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, + HNAE3_FW_VERSION_BYTE3_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK, + HNAE3_FW_VERSION_BYTE2_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK, + HNAE3_FW_VERSION_BYTE1_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK, + HNAE3_FW_VERSION_BYTE0_SHIFT)); + + return devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); +} + +static int hclge_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct hclge_devlink_priv *priv = devlink_priv(devlink); + struct hclge_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->vport->nic; + struct pci_dev *pdev = hdev->pdev; + int ret; + + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) { + dev_err(&pdev->dev, "reset is handling\n"); + return -EBUSY; + } + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, + HNAE3_UNINIT_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static int hclge_devlink_reload_up(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct hclge_devlink_priv *priv = devlink_priv(devlink); + struct hclge_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->vport->nic; + int ret; + + *actions_performed = BIT(action); + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static const struct devlink_ops hclge_devlink_ops = { + .info_get = hclge_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = hclge_devlink_reload_down, + .reload_up = hclge_devlink_reload_up, +}; + +int hclge_devlink_init(struct hclge_dev *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct hclge_devlink_priv *priv; + struct devlink *devlink; + int ret; + + devlink = devlink_alloc(&hclge_devlink_ops, + sizeof(struct hclge_devlink_priv)); + if (!devlink) + return -ENOMEM; + + priv = devlink_priv(devlink); + priv->hdev = hdev; + + ret = devlink_register(devlink, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", + ret); + goto out_reg_fail; + } + + hdev->devlink = devlink; + + devlink_reload_enable(devlink); + + return 0; + +out_reg_fail: + devlink_free(devlink); + return ret; +} + +void hclge_devlink_uninit(struct hclge_dev *hdev) +{ + struct devlink *devlink = hdev->devlink; + + if (!devlink) + return; + + devlink_reload_disable(devlink); + + devlink_unregister(devlink); + + devlink_free(devlink); + + hdev->devlink = NULL; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h new file mode 100644 index 000000000000..918be04507a5 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#ifndef __HCLGE_DEVLINK_H +#define __HCLGE_DEVLINK_H + +#include "hclge_main.h" + +struct hclge_devlink_priv { + struct hclge_dev *hdev; +}; + +int hclge_devlink_init(struct hclge_dev *hdev); +void hclge_devlink_uninit(struct hclge_dev *hdev); +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ebeaf12e409b..f15d76ec0068 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -23,6 +23,7 @@ #include "hclge_tm.h" #include "hclge_err.h" #include "hnae3.h" +#include "hclge_devlink.h" #define HCLGE_NAME "hclge" #define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset))) @@ -11482,10 +11483,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto out; + ret = hclge_devlink_init(hdev); + if (ret) + goto err_pci_uninit; + /* Firmware command queue initialize */ ret = hclge_cmd_queue_init(hdev); if (ret) - goto err_pci_uninit; + goto err_devlink_uninit; /* Firmware command initialize */ ret = hclge_cmd_init(hdev); @@ -11658,6 +11663,8 @@ err_msi_uninit: pci_free_irq_vectors(pdev); err_cmd_uninit: hclge_cmd_uninit(hdev); +err_devlink_uninit: + hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.io_base); pci_clear_master(pdev); @@ -12048,6 +12055,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_cmd_uninit(hdev); hclge_misc_irq_uninit(hdev); + hclge_devlink_uninit(hdev); hclge_pci_uninit(hdev); mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_vlan_table(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 3d3352491dba..cc31b12904ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -8,6 +8,7 @@ #include <linux/phy.h> #include <linux/if_vlan.h> #include <linux/kfifo.h> +#include <net/devlink.h> #include "hclge_cmd.h" #include "hclge_ptp.h" @@ -943,6 +944,7 @@ struct hclge_dev { cpumask_t affinity_mask; struct irq_affinity_notify affinity_notify; struct hclge_ptp *ptp; + struct devlink *devlink; }; /* VPort level vlan tag configuration for TX direction */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile index 2c26ea607a53..51ff7d86ee90 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile @@ -7,4 +7,4 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 ccflags-y += -I $(srctree)/$(src) obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o -hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o +hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o hclgevf_devlink.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c new file mode 100644 index 000000000000..21a45279fd99 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#include <net/devlink.h> + +#include "hclgevf_devlink.h" + +static int hclgevf_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ +#define HCLGEVF_DEVLINK_FW_STRING_LEN 32 + struct hclgevf_devlink_priv *priv = devlink_priv(devlink); + char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN]; + struct hclgevf_dev *hdev = priv->hdev; + int ret; + + ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (ret) + return ret; + + snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, + HNAE3_FW_VERSION_BYTE3_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK, + HNAE3_FW_VERSION_BYTE2_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK, + HNAE3_FW_VERSION_BYTE1_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK, + HNAE3_FW_VERSION_BYTE0_SHIFT)); + + return devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); +} + +static int hclgevf_devlink_reload_down(struct devlink *devlink, + bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct hclgevf_devlink_priv *priv = devlink_priv(devlink); + struct hclgevf_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->nic; + struct pci_dev *pdev = hdev->pdev; + int ret; + + if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) { + dev_err(&pdev->dev, "reset is handling\n"); + return -EBUSY; + } + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, + HNAE3_UNINIT_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static int hclgevf_devlink_reload_up(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct hclgevf_devlink_priv *priv = devlink_priv(devlink); + struct hclgevf_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->nic; + int ret; + + *actions_performed = BIT(action); + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static const struct devlink_ops hclgevf_devlink_ops = { + .info_get = hclgevf_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = hclgevf_devlink_reload_down, + .reload_up = hclgevf_devlink_reload_up, +}; + +int hclgevf_devlink_init(struct hclgevf_dev *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct hclgevf_devlink_priv *priv; + struct devlink *devlink; + int ret; + + devlink = devlink_alloc(&hclgevf_devlink_ops, + sizeof(struct hclgevf_devlink_priv)); + if (!devlink) + return -ENOMEM; + + priv = devlink_priv(devlink); + priv->hdev = hdev; + + ret = devlink_register(devlink, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", + ret); + goto out_reg_fail; + } + + hdev->devlink = devlink; + + devlink_reload_enable(devlink); + + return 0; + +out_reg_fail: + devlink_free(devlink); + return ret; +} + +void hclgevf_devlink_uninit(struct hclgevf_dev *hdev) +{ + struct devlink *devlink = hdev->devlink; + + if (!devlink) + return; + + devlink_reload_disable(devlink); + + devlink_unregister(devlink); + + devlink_free(devlink); + + hdev->devlink = NULL; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h new file mode 100644 index 000000000000..e09ea3d8a963 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#ifndef __HCLGEVF_DEVLINK_H +#define __HCLGEVF_DEVLINK_H + +#include "hclgevf_main.h" + +struct hclgevf_devlink_priv { + struct hclgevf_dev *hdev; +}; + +int hclgevf_devlink_init(struct hclgevf_dev *hdev); +void hclgevf_devlink_uninit(struct hclgevf_dev *hdev); +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 8784d61e833f..3a19f08bfff3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -8,6 +8,7 @@ #include "hclgevf_main.h" #include "hclge_mbx.h" #include "hnae3.h" +#include "hclgevf_devlink.h" #define HCLGEVF_NAME "hclgevf" @@ -3337,6 +3338,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) if (ret) return ret; + ret = hclgevf_devlink_init(hdev); + if (ret) + goto err_devlink_init; + ret = hclgevf_cmd_queue_init(hdev); if (ret) goto err_cmd_queue_init; @@ -3441,6 +3446,8 @@ err_misc_irq_init: err_cmd_init: hclgevf_cmd_uninit(hdev); err_cmd_queue_init: + hclgevf_devlink_uninit(hdev); +err_devlink_init: hclgevf_pci_uninit(hdev); clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state); return ret; @@ -3462,6 +3469,7 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) } hclgevf_cmd_uninit(hdev); + hclgevf_devlink_uninit(hdev); hclgevf_pci_uninit(hdev); hclgevf_uninit_mac_list(hdev); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index d7d02848d674..6f222a3a0bf2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -6,6 +6,7 @@ #include <linux/fs.h> #include <linux/if_vlan.h> #include <linux/types.h> +#include <net/devlink.h> #include "hclge_mbx.h" #include "hclgevf_cmd.h" #include "hnae3.h" @@ -330,6 +331,8 @@ struct hclgevf_dev { u32 flag; unsigned long serv_processed_cnt; unsigned long last_serv_processed; + + struct devlink *devlink; }; static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index fc8c7cd67471..b8a40146b895 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str) add, add + 6, add, add[12], add[13], str); } -static int io = 0x300; -static int irq = 10; - static const struct net_device_ops i596_netdev_ops = { .ndo_open = i596_open, .ndo_stop = i596_close, @@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -struct net_device * __init i82596_probe(int unit) +static struct net_device * __init i82596_probe(void) { struct net_device *dev; int i; @@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } else { - dev->base_addr = io; - dev->irq = irq; - } - #ifdef ENABLE_MVME16x_NET if (MACH_IS_MVME16x) { if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) { @@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev) } } -#ifdef MODULE static struct net_device *dev_82596; static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "i82596 debug mask"); -int __init init_module(void) +static int __init i82596_init(void) { if (debug >= 0) i596_debug = debug; - dev_82596 = i82596_probe(-1); + dev_82596 = i82596_probe(); return PTR_ERR_OR_ZERO(dev_82596); } +module_init(i82596_init); -void __exit cleanup_module(void) +static void __exit i82596_cleanup(void) { unregister_netdev(dev_82596); #ifdef __mc68000__ @@ -1544,5 +1533,4 @@ void __exit cleanup_module(void) free_page ((u32)(dev_82596->mem_start)); free_netdev(dev_82596); } - -#endif /* MODULE */ +module_exit(i82596_cleanup); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 4564ee02c95f..893e0ddcb611 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */ static int fifo=0x8; /* don't change */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev) memset((char *)p->scb,0,sizeof(struct scb_struct)); } -struct net_device * __init sun3_82586_probe(int unit) +static int __init sun3_82586_probe(void) { struct net_device *dev; unsigned long ioaddr; @@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit) break; default: - return ERR_PTR(-ENODEV); + return -ENODEV; } if (found) - return ERR_PTR(-ENODEV); + return -ENODEV; ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE); if (!ioaddr) - return ERR_PTR(-ENOMEM); + return -ENOMEM; found = 1; dev = alloc_etherdev(sizeof(struct priv)); if (!dev) goto out; - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } - dev->irq = IE_IRQ; dev->base_addr = ioaddr; err = sun3_82586_probe1(dev, ioaddr); @@ -326,8 +322,9 @@ out1: free_netdev(dev); out: iounmap((void __iomem *)ioaddr); - return ERR_PTR(err); + return err; } +module_init(sun3_82586_probe); static const struct net_device_ops sun3_82586_netdev_ops = { .ndo_open = sun3_82586_open, diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 471be6ec7e8a..664a91af662d 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -3011,7 +3011,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, @@ -3023,7 +3023,7 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 737ba85e409f..3d9b4f99d357 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1630,7 +1630,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_stop = ibmveth_close, .ndo_start_xmit = ibmveth_start_xmit, .ndo_set_rx_mode = ibmveth_set_multicast_list, - .ndo_do_ioctl = ibmveth_ioctl, + .ndo_eth_ioctl = ibmveth_ioctl, .ndo_change_mtu = ibmveth_change_mtu, .ndo_fix_features = ibmveth_fix_features, .ndo_set_features = ibmveth_set_features, diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 1b0958bd24f6..373eb027b925 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2715,10 +2715,10 @@ static void e100_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_TEST: - memcpy(data, *e100_gstrings_test, sizeof(e100_gstrings_test)); + memcpy(data, e100_gstrings_test, sizeof(e100_gstrings_test)); break; case ETH_SS_STATS: - memcpy(data, *e100_gstrings_stats, sizeof(e100_gstrings_stats)); + memcpy(data, e100_gstrings_stats, sizeof(e100_gstrings_stats)); break; } } @@ -2809,7 +2809,7 @@ static const struct net_device_ops e100_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = e100_set_multicast_list, .ndo_set_mac_address = e100_set_mac_address, - .ndo_do_ioctl = e100_do_ioctl, + .ndo_eth_ioctl = e100_do_ioctl, .ndo_tx_timeout = e100_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = e100_netpoll, diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index c2a109126c27..bed4f040face 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -832,7 +832,7 @@ static const struct net_device_ops e1000_netdev_ops = { .ndo_set_mac_address = e1000_set_mac, .ndo_tx_timeout = e1000_tx_timeout, .ndo_change_mtu = e1000_change_mtu, - .ndo_do_ioctl = e1000_ioctl, + .ndo_eth_ioctl = e1000_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = e1000_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = e1000_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 06442e6bef73..7256b43b7a65 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -903,6 +903,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: mask |= BIT(18); break; default: @@ -1569,6 +1570,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: fext_nvm11 = er32(FEXTNVM11); fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX; ew32(FEXTNVM11, fext_nvm11); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index db79c4e6413e..bcf680e83811 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -98,14 +98,22 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_TGP_I219_V14 0x15FA #define E1000_DEV_ID_PCH_TGP_I219_LM15 0x15F4 #define E1000_DEV_ID_PCH_TGP_I219_V15 0x15F5 +#define E1000_DEV_ID_PCH_RPL_I219_LM23 0x0DC5 +#define E1000_DEV_ID_PCH_RPL_I219_V23 0x0DC6 #define E1000_DEV_ID_PCH_ADP_I219_LM16 0x1A1E #define E1000_DEV_ID_PCH_ADP_I219_V16 0x1A1F #define E1000_DEV_ID_PCH_ADP_I219_LM17 0x1A1C #define E1000_DEV_ID_PCH_ADP_I219_V17 0x1A1D +#define E1000_DEV_ID_PCH_RPL_I219_LM22 0x0DC7 +#define E1000_DEV_ID_PCH_RPL_I219_V22 0x0DC8 #define E1000_DEV_ID_PCH_MTP_I219_LM18 0x550A #define E1000_DEV_ID_PCH_MTP_I219_V18 0x550B #define E1000_DEV_ID_PCH_MTP_I219_LM19 0x550C #define E1000_DEV_ID_PCH_MTP_I219_V19 0x550D +#define E1000_DEV_ID_PCH_LNP_I219_LM20 0x550E +#define E1000_DEV_ID_PCH_LNP_I219_V20 0x550F +#define E1000_DEV_ID_PCH_LNP_I219_LM21 0x5510 +#define E1000_DEV_ID_PCH_LNP_I219_V21 0x5511 #define E1000_REVISION_4 4 @@ -132,6 +140,7 @@ enum e1000_mac_type { e1000_pch_tgp, e1000_pch_adp, e1000_pch_mtp, + e1000_pch_lnp, }; enum e1000_media_type { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index cf7b3887da1d..2f97c9f5611d 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -321,6 +321,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -466,6 +467,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ @@ -711,6 +713,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: case e1000_pchlan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; @@ -1266,9 +1269,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) usleep_range(10000, 11000); } if (firmware_bug) - e_warn("ULP_CONFIG_DONE took %dmsec. This is a firmware bug\n", i * 10); + e_warn("ULP_CONFIG_DONE took %d msec. This is a firmware bug\n", + i * 10); else - e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); + e_dbg("ULP_CONFIG_DONE cleared after %d msec\n", + i * 10); if (force) { mac_reg = er32(H2ME); @@ -1663,6 +1668,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: rc = e1000_init_phy_params_pchlan(hw); break; default: @@ -2118,6 +2124,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: @@ -3162,6 +3169,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: bank1_offset = nvm->flash_bank_size; act_offset = E1000_ICH_NVM_SIG_WORD; @@ -4101,6 +4109,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 1502895eb45d..9b145f6248a8 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -41,12 +41,15 @@ #define E1000_FWSM_WLOCK_MAC_MASK 0x0380 #define E1000_FWSM_WLOCK_MAC_SHIFT 7 #define E1000_FWSM_ULP_CFG_DONE 0x00000400 /* Low power cfg done */ +#define E1000_EXFWSM_DPG_EXIT_DONE 0x00000001 /* Shared Receive Address Registers */ #define E1000_SHRAL_PCH_LPT(_i) (0x05408 + ((_i) * 8)) #define E1000_SHRAH_PCH_LPT(_i) (0x0540C + ((_i) * 8)) #define E1000_H2ME 0x05B50 /* Host to ME */ +#define E1000_H2ME_START_DPG 0x00000001 /* indicate the ME of DPG */ +#define E1000_H2ME_EXIT_DPG 0x00000002 /* indicate the ME exit DPG */ #define E1000_H2ME_ULP 0x00000800 /* ULP Indication Bit */ #define E1000_H2ME_ENFORCE_SETTINGS 0x00001000 /* Enforce Settings */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 757a54c39eef..900b3ab998bd 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3550,6 +3550,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ incperiod = INCPERIOD_24MHZ; @@ -4068,6 +4069,7 @@ void e1000e_reset(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: fc->refresh_time = 0xFFFF; fc->pause_time = 0xFFFF; @@ -6343,42 +6345,110 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) u32 mac_data; u16 phy_data; - /* Disable the periodic inband message, - * don't request PCIe clock in K1 page770_17[10:9] = 10b - */ - e1e_rphy(hw, HV_PM_CTRL, &phy_data); - phy_data &= ~HV_PM_CTRL_K1_CLK_REQ; - phy_data |= BIT(10); - e1e_wphy(hw, HV_PM_CTRL, phy_data); + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + /* Request ME configure the device for S0ix */ + mac_data = er32(H2ME); + mac_data |= E1000_H2ME_START_DPG; + mac_data &= ~E1000_H2ME_EXIT_DPG; + ew32(H2ME, mac_data); + } else { + /* Request driver configure the device to S0ix */ + /* Disable the periodic inband message, + * don't request PCIe clock in K1 page770_17[10:9] = 10b + */ + e1e_rphy(hw, HV_PM_CTRL, &phy_data); + phy_data &= ~HV_PM_CTRL_K1_CLK_REQ; + phy_data |= BIT(10); + e1e_wphy(hw, HV_PM_CTRL, phy_data); - /* Make sure we don't exit K1 every time a new packet arrives - * 772_29[5] = 1 CS_Mode_Stay_In_K1 - */ - e1e_rphy(hw, I217_CGFREG, &phy_data); - phy_data |= BIT(5); - e1e_wphy(hw, I217_CGFREG, phy_data); + /* Make sure we don't exit K1 every time a new packet arrives + * 772_29[5] = 1 CS_Mode_Stay_In_K1 + */ + e1e_rphy(hw, I217_CGFREG, &phy_data); + phy_data |= BIT(5); + e1e_wphy(hw, I217_CGFREG, phy_data); - /* Change the MAC/PHY interface to SMBus - * Force the SMBus in PHY page769_23[0] = 1 - * Force the SMBus in MAC CTRL_EXT[11] = 1 - */ - e1e_rphy(hw, CV_SMB_CTRL, &phy_data); - phy_data |= CV_SMB_CTRL_FORCE_SMBUS; - e1e_wphy(hw, CV_SMB_CTRL, phy_data); - mac_data = er32(CTRL_EXT); - mac_data |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_data); + /* Change the MAC/PHY interface to SMBus + * Force the SMBus in PHY page769_23[0] = 1 + * Force the SMBus in MAC CTRL_EXT[11] = 1 + */ + e1e_rphy(hw, CV_SMB_CTRL, &phy_data); + phy_data |= CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, phy_data); + mac_data = er32(CTRL_EXT); + mac_data |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_data); + + /* DFT control: PHY bit: page769_20[0] = 1 + * page769_20[7] - PHY PLL stop + * page769_20[8] - PHY go to the electrical idle + * page769_20[9] - PHY serdes disable + * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1 + */ + e1e_rphy(hw, I82579_DFT_CTRL, &phy_data); + phy_data |= BIT(0); + phy_data |= BIT(7); + phy_data |= BIT(8); + phy_data |= BIT(9); + e1e_wphy(hw, I82579_DFT_CTRL, phy_data); + + mac_data = er32(EXTCNF_CTRL); + mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; + ew32(EXTCNF_CTRL, mac_data); + + /* Enable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(22); + ew32(FEXTNVM7, mac_data); + + /* Disable disconnected cable conditioning for Power Gating */ + mac_data = er32(DPGFR); + mac_data |= BIT(2); + ew32(DPGFR, mac_data); + + /* Don't wake from dynamic Power Gating with clock request */ + mac_data = er32(FEXTNVM12); + mac_data |= BIT(12); + ew32(FEXTNVM12, mac_data); + + /* Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data &= ~BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Enable K1 off to enable mPHY Power Gating */ + mac_data = er32(FEXTNVM6); + mac_data |= BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Enable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data |= BIT(9); + ew32(FEXTNVM8, mac_data); + + /* Enable the Dynamic Clock Gating in the DMA and MAC */ + mac_data = er32(CTRL_EXT); + mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN; + ew32(CTRL_EXT, mac_data); + + /* No MAC DPG gating SLP_S0 in modern standby + * Switch the logic of the lanphypc to use PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data |= BIT(7); + ew32(FEXTNVM5, mac_data); + } - /* DFT control: PHY bit: page769_20[0] = 1 - * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1 - */ - e1e_rphy(hw, I82579_DFT_CTRL, &phy_data); - phy_data |= BIT(0); - e1e_wphy(hw, I82579_DFT_CTRL, phy_data); + /* Disable the time synchronization clock */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(31); + mac_data &= ~BIT(0); + ew32(FEXTNVM7, mac_data); - mac_data = er32(EXTCNF_CTRL); - mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; - ew32(EXTCNF_CTRL, mac_data); + /* Dynamic Power Gating Enable */ + mac_data = er32(CTRL_EXT); + mac_data |= BIT(3); + ew32(CTRL_EXT, mac_data); /* Check MAC Tx/Rx packet buffer pointers. * Reset MAC Tx/Rx packet buffer pointers to suppress any @@ -6414,148 +6484,130 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) mac_data = er32(RDFPC); if (mac_data) ew32(RDFPC, 0); - - /* Enable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(22); - ew32(FEXTNVM7, mac_data); - - /* Disable the time synchronization clock */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(31); - mac_data &= ~BIT(0); - ew32(FEXTNVM7, mac_data); - - /* Dynamic Power Gating Enable */ - mac_data = er32(CTRL_EXT); - mac_data |= BIT(3); - ew32(CTRL_EXT, mac_data); - - /* Disable disconnected cable conditioning for Power Gating */ - mac_data = er32(DPGFR); - mac_data |= BIT(2); - ew32(DPGFR, mac_data); - - /* Don't wake from dynamic Power Gating with clock request */ - mac_data = er32(FEXTNVM12); - mac_data |= BIT(12); - ew32(FEXTNVM12, mac_data); - - /* Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data &= ~BIT(28); - ew32(FEXTNVM9, mac_data); - - /* Enable K1 off to enable mPHY Power Gating */ - mac_data = er32(FEXTNVM6); - mac_data |= BIT(31); - ew32(FEXTNVM6, mac_data); - - /* Enable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data |= BIT(9); - ew32(FEXTNVM8, mac_data); - - /* Enable the Dynamic Clock Gating in the DMA and MAC */ - mac_data = er32(CTRL_EXT); - mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN; - ew32(CTRL_EXT, mac_data); - - /* No MAC DPG gating SLP_S0 in modern standby - * Switch the logic of the lanphypc to use PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data |= BIT(7); - ew32(FEXTNVM5, mac_data); } static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + bool firmware_bug = false; u32 mac_data; u16 phy_data; + u32 i = 0; + + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + /* Request ME unconfigure the device from S0ix */ + mac_data = er32(H2ME); + mac_data &= ~E1000_H2ME_START_DPG; + mac_data |= E1000_H2ME_EXIT_DPG; + ew32(H2ME, mac_data); + + /* Poll up to 2.5 seconds for ME to unconfigure DPG. + * If this takes more than 1 second, show a warning indicating a + * firmware bug + */ + while (!(er32(EXFWSM) & E1000_EXFWSM_DPG_EXIT_DONE)) { + if (i > 100 && !firmware_bug) + firmware_bug = true; - /* Disable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data &= 0xFFBFFFFF; - ew32(FEXTNVM7, mac_data); + if (i++ == 250) { + e_dbg("Timeout (firmware bug): %d msec\n", + i * 10); + break; + } - /* Enable the time synchronization clock */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(0); - ew32(FEXTNVM7, mac_data); + usleep_range(10000, 11000); + } + if (firmware_bug) + e_warn("DPG_EXIT_DONE took %d msec. This is a firmware bug\n", + i * 10); + else + e_dbg("DPG_EXIT_DONE cleared after %d msec\n", i * 10); + } else { + /* Request driver unconfigure the device from S0ix */ + + /* Disable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data &= 0xFFBFFFFF; + ew32(FEXTNVM7, mac_data); + + /* Disable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data &= ~BIT(9); + ew32(FEXTNVM8, mac_data); + + /* Disable K1 off */ + mac_data = er32(FEXTNVM6); + mac_data &= ~BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Disable Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data |= BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Cancel not waking from dynamic + * Power Gating with clock request + */ + mac_data = er32(FEXTNVM12); + mac_data &= ~BIT(12); + ew32(FEXTNVM12, mac_data); - /* Disable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data &= ~BIT(9); - ew32(FEXTNVM8, mac_data); + /* Cancel disable disconnected cable conditioning + * for Power Gating + */ + mac_data = er32(DPGFR); + mac_data &= ~BIT(2); + ew32(DPGFR, mac_data); - /* Disable K1 off */ - mac_data = er32(FEXTNVM6); - mac_data &= ~BIT(31); - ew32(FEXTNVM6, mac_data); + /* Disable the Dynamic Clock Gating in the DMA and MAC */ + mac_data = er32(CTRL_EXT); + mac_data &= 0xFFF7FFFF; + ew32(CTRL_EXT, mac_data); - /* Disable Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data |= BIT(28); - ew32(FEXTNVM9, mac_data); + /* Revert the lanphypc logic to use the internal Gbe counter + * and not the PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data &= 0xFFFFFF7F; + ew32(FEXTNVM5, mac_data); - /* Cancel not waking from dynamic - * Power Gating with clock request - */ - mac_data = er32(FEXTNVM12); - mac_data &= ~BIT(12); - ew32(FEXTNVM12, mac_data); + /* Enable the periodic inband message, + * Request PCIe clock in K1 page770_17[10:9] =01b + */ + e1e_rphy(hw, HV_PM_CTRL, &phy_data); + phy_data &= 0xFBFF; + phy_data |= HV_PM_CTRL_K1_CLK_REQ; + e1e_wphy(hw, HV_PM_CTRL, phy_data); - /* Cancel disable disconnected cable conditioning - * for Power Gating - */ - mac_data = er32(DPGFR); - mac_data &= ~BIT(2); - ew32(DPGFR, mac_data); + /* Return back configuration + * 772_29[5] = 0 CS_Mode_Stay_In_K1 + */ + e1e_rphy(hw, I217_CGFREG, &phy_data); + phy_data &= 0xFFDF; + e1e_wphy(hw, I217_CGFREG, phy_data); + + /* Change the MAC/PHY interface to Kumeran + * Unforce the SMBus in PHY page769_23[0] = 0 + * Unforce the SMBus in MAC CTRL_EXT[11] = 0 + */ + e1e_rphy(hw, CV_SMB_CTRL, &phy_data); + phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, phy_data); + mac_data = er32(CTRL_EXT); + mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_data); + } /* Disable Dynamic Power Gating */ mac_data = er32(CTRL_EXT); mac_data &= 0xFFFFFFF7; ew32(CTRL_EXT, mac_data); - /* Disable the Dynamic Clock Gating in the DMA and MAC */ - mac_data = er32(CTRL_EXT); - mac_data &= 0xFFF7FFFF; - ew32(CTRL_EXT, mac_data); - - /* Revert the lanphypc logic to use the internal Gbe counter - * and not the PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data &= 0xFFFFFF7F; - ew32(FEXTNVM5, mac_data); - - /* Enable the periodic inband message, - * Request PCIe clock in K1 page770_17[10:9] =01b - */ - e1e_rphy(hw, HV_PM_CTRL, &phy_data); - phy_data &= 0xFBFF; - phy_data |= HV_PM_CTRL_K1_CLK_REQ; - e1e_wphy(hw, HV_PM_CTRL, phy_data); - - /* Return back configuration - * 772_29[5] = 0 CS_Mode_Stay_In_K1 - */ - e1e_rphy(hw, I217_CGFREG, &phy_data); - phy_data &= 0xFFDF; - e1e_wphy(hw, I217_CGFREG, phy_data); - - /* Change the MAC/PHY interface to Kumeran - * Unforce the SMBus in PHY page769_23[0] = 0 - * Unforce the SMBus in MAC CTRL_EXT[11] = 0 - */ - e1e_rphy(hw, CV_SMB_CTRL, &phy_data); - phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS; - e1e_wphy(hw, CV_SMB_CTRL, phy_data); - mac_data = er32(CTRL_EXT); - mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_data); + /* Enable the time synchronization clock */ + mac_data = er32(FEXTNVM7); + mac_data &= ~BIT(31); + mac_data |= BIT(0); + ew32(FEXTNVM7, mac_data); } static int e1000e_pm_freeze(struct device *dev) @@ -7302,7 +7354,7 @@ static const struct net_device_ops e1000e_netdev_ops = { .ndo_set_rx_mode = e1000e_set_rx_mode, .ndo_set_mac_address = e1000_set_mac, .ndo_change_mtu = e1000_change_mtu, - .ndo_do_ioctl = e1000_ioctl, + .ndo_eth_ioctl = e1000_ioctl, .ndo_tx_timeout = e1000_tx_timeout, .ndo_validate_addr = eth_validate_addr, @@ -7677,7 +7729,7 @@ err_dma: * @pdev: PCI device information struct * * e1000_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a + * that it should release a PCI device. This could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory. **/ @@ -7850,14 +7902,22 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_cnp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 9e79d672f4f1..eb5c014c02fb 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -298,6 +298,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: if ((hw->mac.type < e1000_pch_lpt) || (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { adapter->ptp_clock_info.max_adj = 24000000 - 1; diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h index 8165ba2619a4..6c0cd8cab3ef 100644 --- a/drivers/net/ethernet/intel/e1000e/regs.h +++ b/drivers/net/ethernet/intel/e1000e/regs.h @@ -213,6 +213,7 @@ #define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ #define E1000_SWSM 0x05B50 /* SW Semaphore */ #define E1000_FWSM 0x05B54 /* FW Semaphore */ +#define E1000_EXFWSM 0x05B58 /* Extended FW Semaphore */ /* Driver-only SW semaphore (not used by BOOT agents) */ #define E1000_SWSM2 0x05B58 #define E1000_FFLT_DBG 0x05F04 /* Debug Register */ diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b9417dc0007c..39fb3d57c057 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -428,6 +428,8 @@ struct i40e_channel { struct i40e_vsi *parent_vsi; }; +struct i40e_ptp_pins_settings; + static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch) { return !!ch->fwd; @@ -644,12 +646,83 @@ struct i40e_pf { struct i40e_rx_pb_config pb_cfg; /* Current Rx packet buffer config */ struct i40e_dcbx_config tmp_cfg; +/* GPIO defines used by PTP */ +#define I40E_SDP3_2 18 +#define I40E_SDP3_3 19 +#define I40E_GPIO_4 20 +#define I40E_LED2_0 26 +#define I40E_LED2_1 27 +#define I40E_LED3_0 28 +#define I40E_LED3_1 29 +#define I40E_GLGEN_GPIO_SET_SDP_DATA_HI \ + (1 << I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT) +#define I40E_GLGEN_GPIO_SET_DRV_SDP_DATA \ + (1 << I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PRT_NUM_0 \ + (0 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PRT_NUM_1 \ + (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT) +#define I40E_GLGEN_GPIO_CTL_RESERVED BIT(2) +#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z \ + (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT) +#define I40E_GLGEN_GPIO_CTL_DIR_OUT \ + (1 << I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT) +#define I40E_GLGEN_GPIO_CTL_TRI_DRV_HI \ + (1 << I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT) +#define I40E_GLGEN_GPIO_CTL_OUT_HI_RST \ + (1 << I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT) +#define I40E_GLGEN_GPIO_CTL_TIMESYNC_0 \ + (3 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) +#define I40E_GLGEN_GPIO_CTL_TIMESYNC_1 \ + (4 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) +#define I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN \ + (0x3F << I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT) +#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT \ + (1 << I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0) +#define I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1) +#define I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \ + I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0) +#define I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \ + I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1) +#define I40E_GLGEN_GPIO_CTL_LED_INIT \ + (I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z | \ + I40E_GLGEN_GPIO_CTL_DIR_OUT | \ + I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | \ + I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \ + I40E_GLGEN_GPIO_CTL_OUT_DEFAULT | \ + I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN) +#define I40E_PRTTSYN_AUX_1_INSTNT \ + (1 << I40E_PRTTSYN_AUX_1_INSTNT_SHIFT) +#define I40E_PRTTSYN_AUX_0_OUT_ENABLE \ + (1 << I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT) +#define I40E_PRTTSYN_AUX_0_OUT_CLK_MOD (3 << I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT) +#define I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD \ + (I40E_PRTTSYN_AUX_0_OUT_ENABLE | I40E_PRTTSYN_AUX_0_OUT_CLK_MOD) +#define I40E_PTP_HALF_SECOND 500000000LL /* nano seconds */ +#define I40E_PTP_2_SEC_DELAY 2 + struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct sk_buff *ptp_tx_skb; unsigned long ptp_tx_start; struct hwtstamp_config tstamp_config; struct timespec64 ptp_prev_hw_time; + struct work_struct ptp_pps_work; + struct work_struct ptp_extts0_work; + struct work_struct ptp_extts1_work; ktime_t ptp_reset_start; struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */ u32 ptp_adj_mult; @@ -657,10 +730,14 @@ struct i40e_pf { u32 tx_hwtstamp_skipped; u32 rx_hwtstamp_cleared; u32 latch_event_flags; + u64 ptp_pps_start; + u32 pps_delay; spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */ + struct ptp_pin_desc ptp_pin[3]; unsigned long latch_events[4]; bool ptp_tx; bool ptp_rx; + struct i40e_ptp_pins_settings *ptp_pins; u16 rss_table_size; /* HW RSS table size */ u32 max_bw; u32 min_bw; @@ -1169,6 +1246,7 @@ void i40e_ptp_save_hw_time(struct i40e_pf *pf); void i40e_ptp_restore_hw_time(struct i40e_pf *pf); void i40e_ptp_init(struct i40e_pf *pf); void i40e_ptp_stop(struct i40e_pf *pf); +int i40e_ptp_alloc_pins(struct i40e_pf *pf); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf); i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1d1f52756a93..97c78551395b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4079,10 +4079,13 @@ static irqreturn_t i40e_intr(int irq, void *data) if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) { u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0); - if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) { - icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; + if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK) + schedule_work(&pf->ptp_extts0_work); + + if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) i40e_ptp_tx_hwtstamp(pf); - } + + icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; } /* If a critical error is pending we have no choice but to reset the @@ -13265,7 +13268,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = i40e_set_mac, .ndo_change_mtu = i40e_change_mtu, - .ndo_do_ioctl = i40e_ioctl, + .ndo_eth_ioctl = i40e_ioctl, .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, @@ -15181,6 +15184,22 @@ err_switch_setup: } /** + * i40e_set_subsystem_device_id - set subsystem device id + * @hw: pointer to the hardware info + * + * Set PCI subsystem device id either from a pci_dev structure or + * a specific FW register. + **/ +static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw) +{ + struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev; + + hw->subsystem_device_id = pdev->subsystem_device ? + pdev->subsystem_device : + (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX); +} + +/** * i40e_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in i40e_pci_tbl @@ -15275,7 +15294,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->device_id = pdev->device; pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; + i40e_set_subsystem_device_id(hw); hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); hw->bus.bus_id = pdev->bus->number; @@ -15455,6 +15474,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (is_valid_ether_addr(hw->mac.port_addr)) pf->hw_features |= I40E_HW_PORT_ID_VALID; + i40e_ptp_alloc_pins(pf); pci_set_drvdata(pdev, pf); pci_save_state(pdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 7b971b205d36..09b1d5aed1c9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -3,6 +3,7 @@ #include "i40e.h" #include <linux/ptp_classify.h> +#include <linux/posix-clock.h> /* The XL710 timesync is very much like Intel's 82599 design when it comes to * the fundamental clock design. However, the clock operations are much simpler @@ -20,10 +21,252 @@ #define I40E_PTP_10GB_INCVAL_MULT 2 #define I40E_PTP_5GB_INCVAL_MULT 2 #define I40E_PTP_1GB_INCVAL_MULT 20 +#define I40E_ISGN 0x80000000 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1 BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT) #define I40E_PRTTSYN_CTL1_TSYNTYPE_V2 (2 << \ I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT) +#define I40E_SUBDEV_ID_25G_PTP_PIN 0xB +#define to_dev(obj) container_of(obj, struct device, kobj) + +enum i40e_ptp_pin { + SDP3_2 = 0, + SDP3_3, + GPIO_4 +}; + +enum i40e_can_set_pins_t { + CANT_DO_PINS = -1, + CAN_SET_PINS, + CAN_DO_PINS +}; + +static struct ptp_pin_desc sdp_desc[] = { + /* name idx func chan */ + {"SDP3_2", SDP3_2, PTP_PF_NONE, 0}, + {"SDP3_3", SDP3_3, PTP_PF_NONE, 1}, + {"GPIO_4", GPIO_4, PTP_PF_NONE, 1}, +}; + +enum i40e_ptp_gpio_pin_state { + end = -2, + invalid, + off, + in_A, + in_B, + out_A, + out_B, +}; + +static const char * const i40e_ptp_gpio_pin_state2str[] = { + "off", "in_A", "in_B", "out_A", "out_B" +}; + +enum i40e_ptp_led_pin_state { + led_end = -2, + low = 0, + high, +}; + +struct i40e_ptp_pins_settings { + enum i40e_ptp_gpio_pin_state sdp3_2; + enum i40e_ptp_gpio_pin_state sdp3_3; + enum i40e_ptp_gpio_pin_state gpio_4; + enum i40e_ptp_led_pin_state led2_0; + enum i40e_ptp_led_pin_state led2_1; + enum i40e_ptp_led_pin_state led3_0; + enum i40e_ptp_led_pin_state led3_1; +}; + +static const struct i40e_ptp_pins_settings + i40e_ptp_pin_led_allowed_states[] = { + {off, off, off, high, high, high, high}, + {off, in_A, off, high, high, high, low}, + {off, out_A, off, high, low, high, high}, + {off, in_B, off, high, high, high, low}, + {off, out_B, off, high, low, high, high}, + {in_A, off, off, high, high, high, low}, + {in_A, in_B, off, high, high, high, low}, + {in_A, out_B, off, high, low, high, high}, + {out_A, off, off, high, low, high, high}, + {out_A, in_B, off, high, low, high, high}, + {in_B, off, off, high, high, high, low}, + {in_B, in_A, off, high, high, high, low}, + {in_B, out_A, off, high, low, high, high}, + {out_B, off, off, high, low, high, high}, + {out_B, in_A, off, high, low, high, high}, + {off, off, in_A, high, high, low, high}, + {off, out_A, in_A, high, low, low, high}, + {off, in_B, in_A, high, high, low, low}, + {off, out_B, in_A, high, low, low, high}, + {out_A, off, in_A, high, low, low, high}, + {out_A, in_B, in_A, high, low, low, high}, + {in_B, off, in_A, high, high, low, low}, + {in_B, out_A, in_A, high, low, low, high}, + {out_B, off, in_A, high, low, low, high}, + {off, off, out_A, low, high, high, high}, + {off, in_A, out_A, low, high, high, low}, + {off, in_B, out_A, low, high, high, low}, + {off, out_B, out_A, low, low, high, high}, + {in_A, off, out_A, low, high, high, low}, + {in_A, in_B, out_A, low, high, high, low}, + {in_A, out_B, out_A, low, low, high, high}, + {in_B, off, out_A, low, high, high, low}, + {in_B, in_A, out_A, low, high, high, low}, + {out_B, off, out_A, low, low, high, high}, + {out_B, in_A, out_A, low, low, high, high}, + {off, off, in_B, high, high, low, high}, + {off, in_A, in_B, high, high, low, low}, + {off, out_A, in_B, high, low, low, high}, + {off, out_B, in_B, high, low, low, high}, + {in_A, off, in_B, high, high, low, low}, + {in_A, out_B, in_B, high, low, low, high}, + {out_A, off, in_B, high, low, low, high}, + {out_B, off, in_B, high, low, low, high}, + {out_B, in_A, in_B, high, low, low, high}, + {off, off, out_B, low, high, high, high}, + {off, in_A, out_B, low, high, high, low}, + {off, out_A, out_B, low, low, high, high}, + {off, in_B, out_B, low, high, high, low}, + {in_A, off, out_B, low, high, high, low}, + {in_A, in_B, out_B, low, high, high, low}, + {out_A, off, out_B, low, low, high, high}, + {out_A, in_B, out_B, low, low, high, high}, + {in_B, off, out_B, low, high, high, low}, + {in_B, in_A, out_B, low, high, high, low}, + {in_B, out_A, out_B, low, low, high, high}, + {end, end, end, led_end, led_end, led_end, led_end} +}; + +static int i40e_ptp_set_pins(struct i40e_pf *pf, + struct i40e_ptp_pins_settings *pins); + +/** + * i40e_ptp_extts0_work - workqueue task function + * @work: workqueue task structure + * + * Service for PTP external clock event + **/ +static void i40e_ptp_extts0_work(struct work_struct *work) +{ + struct i40e_pf *pf = container_of(work, struct i40e_pf, + ptp_extts0_work); + struct i40e_hw *hw = &pf->hw; + struct ptp_clock_event event; + u32 hi, lo; + + /* Event time is captured by one of the two matched registers + * PRTTSYN_EVNT_L: 32 LSB of sampled time event + * PRTTSYN_EVNT_H: 32 MSB of sampled time event + * Event is defined in PRTTSYN_EVNT_0 register + */ + lo = rd32(hw, I40E_PRTTSYN_EVNT_L(0)); + hi = rd32(hw, I40E_PRTTSYN_EVNT_H(0)); + + event.timestamp = (((u64)hi) << 32) | lo; + + event.type = PTP_CLOCK_EXTTS; + event.index = hw->pf_id; + + /* fire event */ + ptp_clock_event(pf->ptp_clock, &event); +} + +/** + * i40e_is_ptp_pin_dev - check if device supports PTP pins + * @hw: pointer to the hardware structure + * + * Return true if device supports PTP pins, false otherwise. + **/ +static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw) +{ + return hw->device_id == I40E_DEV_ID_25G_SFP28 && + hw->subsystem_device_id == I40E_SUBDEV_ID_25G_PTP_PIN; +} + +/** + * i40e_can_set_pins - check possibility of manipulating the pins + * @pf: board private structure + * + * Check if all conditions are satisfied to manipulate PTP pins. + * Return CAN_SET_PINS if pins can be set on a specific PF or + * return CAN_DO_PINS if pins can be manipulated within a NIC or + * return CANT_DO_PINS otherwise. + **/ +static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf) +{ + if (!i40e_is_ptp_pin_dev(&pf->hw)) { + dev_warn(&pf->pdev->dev, + "PTP external clock not supported.\n"); + return CANT_DO_PINS; + } + + if (!pf->ptp_pins) { + dev_warn(&pf->pdev->dev, + "PTP PIN manipulation not allowed.\n"); + return CANT_DO_PINS; + } + + if (pf->hw.pf_id) { + dev_warn(&pf->pdev->dev, + "PTP PINs should be accessed via PF0.\n"); + return CAN_DO_PINS; + } + + return CAN_SET_PINS; +} + +/** + * i40_ptp_reset_timing_events - Reset PTP timing events + * @pf: Board private structure + * + * This function resets timing events for pf. + **/ +static void i40_ptp_reset_timing_events(struct i40e_pf *pf) +{ + u32 i; + + spin_lock_bh(&pf->ptp_rx_lock); + for (i = 0; i <= I40E_PRTTSYN_RXTIME_L_MAX_INDEX; i++) { + /* reading and automatically clearing timing events registers */ + rd32(&pf->hw, I40E_PRTTSYN_RXTIME_L(i)); + rd32(&pf->hw, I40E_PRTTSYN_RXTIME_H(i)); + pf->latch_events[i] = 0; + } + /* reading and automatically clearing timing events registers */ + rd32(&pf->hw, I40E_PRTTSYN_TXTIME_L); + rd32(&pf->hw, I40E_PRTTSYN_TXTIME_H); + + pf->tx_hwtstamp_timeouts = 0; + pf->tx_hwtstamp_skipped = 0; + pf->rx_hwtstamp_cleared = 0; + pf->latch_event_flags = 0; + spin_unlock_bh(&pf->ptp_rx_lock); +} + +/** + * i40e_ptp_verify - check pins + * @ptp: ptp clock + * @pin: pin index + * @func: assigned function + * @chan: channel + * + * Check pins consistency. + * Return 0 on success or error on failure. + **/ +static int i40e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + case PTP_PF_PEROUT: + break; + case PTP_PF_PHYSYNC: + return -EOPNOTSUPP; + } + return 0; +} /** * i40e_ptp_read - Read the PHC time from the device @@ -137,6 +380,37 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) } /** + * i40e_ptp_set_1pps_signal_hw - configure 1PPS PTP signal for pins + * @pf: the PF private data structure + * + * Configure 1PPS signal used for PTP pins + **/ +static void i40e_ptp_set_1pps_signal_hw(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + struct timespec64 now; + u64 ns; + + wr32(hw, I40E_PRTTSYN_AUX_0(1), 0); + wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT); + wr32(hw, I40E_PRTTSYN_AUX_0(1), I40E_PRTTSYN_AUX_0_OUT_ENABLE); + + i40e_ptp_read(pf, &now, NULL); + now.tv_sec += I40E_PTP_2_SEC_DELAY; + now.tv_nsec = 0; + ns = timespec64_to_ns(&now); + + /* I40E_PRTTSYN_TGT_L(1) */ + wr32(hw, I40E_PRTTSYN_TGT_L(1), ns & 0xFFFFFFFF); + /* I40E_PRTTSYN_TGT_H(1) */ + wr32(hw, I40E_PRTTSYN_TGT_H(1), ns >> 32); + wr32(hw, I40E_PRTTSYN_CLKO(1), I40E_PTP_HALF_SECOND); + wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT); + wr32(hw, I40E_PRTTSYN_AUX_0(1), + I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD); +} + +/** * i40e_ptp_adjtime - Adjust the PHC time * @ptp: The PTP clock structure * @delta: Offset in nanoseconds to adjust the PHC time by @@ -146,14 +420,35 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - struct timespec64 now, then; + struct i40e_hw *hw = &pf->hw; - then = ns_to_timespec64(delta); mutex_lock(&pf->tmreg_lock); - i40e_ptp_read(pf, &now, NULL); - now = timespec64_add(now, then); - i40e_ptp_write(pf, (const struct timespec64 *)&now); + if (delta > -999999900LL && delta < 999999900LL) { + int neg_adj = 0; + u32 timadj; + u64 tohw; + + if (delta < 0) { + neg_adj = 1; + tohw = -delta; + } else { + tohw = delta; + } + + timadj = tohw & 0x3FFFFFFF; + if (neg_adj) + timadj |= I40E_ISGN; + wr32(hw, I40E_PRTTSYN_ADJ, timadj); + } else { + struct timespec64 then, now; + + then = ns_to_timespec64(delta); + i40e_ptp_read(pf, &now, NULL); + now = timespec64_add(now, then); + i40e_ptp_write(pf, (const struct timespec64 *)&now); + i40e_ptp_set_1pps_signal_hw(pf); + } mutex_unlock(&pf->tmreg_lock); @@ -184,7 +479,7 @@ static int i40e_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, /** * i40e_ptp_settime - Set the time of the PHC * @ptp: The PTP clock structure - * @ts: timespec structure that holds the new time value + * @ts: timespec64 structure that holds the new time value * * Set the device clock to the user input value. The conversion from timespec * to ns happens in the write function. @@ -202,18 +497,145 @@ static int i40e_ptp_settime(struct ptp_clock_info *ptp, } /** - * i40e_ptp_feature_enable - Enable/disable ancillary features of the PHC subsystem + * i40e_pps_configure - configure PPS events + * @ptp: ptp clock + * @rq: clock request + * @on: status + * + * Configure PPS events for external clock source. + * Return 0 on success or error on failure. + **/ +static int i40e_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); + + if (!!on) + i40e_ptp_set_1pps_signal_hw(pf); + + return 0; +} + +/** + * i40e_pin_state - determine PIN state + * @index: PIN index + * @func: function assigned to PIN + * + * Determine PIN state based on PIN index and function assigned. + * Return PIN state. + **/ +static enum i40e_ptp_gpio_pin_state i40e_pin_state(int index, int func) +{ + enum i40e_ptp_gpio_pin_state state = off; + + if (index == 0 && func == PTP_PF_EXTTS) + state = in_A; + if (index == 1 && func == PTP_PF_EXTTS) + state = in_B; + if (index == 0 && func == PTP_PF_PEROUT) + state = out_A; + if (index == 1 && func == PTP_PF_PEROUT) + state = out_B; + + return state; +} + +/** + * i40e_ptp_enable_pin - enable PINs. + * @pf: private board structure + * @chan: channel + * @func: PIN function + * @on: state + * + * Enable PTP pins for external clock source. + * Return 0 on success or error code on failure. + **/ +static int i40e_ptp_enable_pin(struct i40e_pf *pf, unsigned int chan, + enum ptp_pin_function func, int on) +{ + enum i40e_ptp_gpio_pin_state *pin = NULL; + struct i40e_ptp_pins_settings pins; + int pin_index; + + /* Use PF0 to set pins. Return success for user space tools */ + if (pf->hw.pf_id) + return 0; + + /* Preserve previous state of pins that we don't touch */ + pins.sdp3_2 = pf->ptp_pins->sdp3_2; + pins.sdp3_3 = pf->ptp_pins->sdp3_3; + pins.gpio_4 = pf->ptp_pins->gpio_4; + + /* To turn on the pin - find the corresponding one based on + * the given index. To to turn the function off - find + * which pin had it assigned. Don't use ptp_find_pin here + * because it tries to lock the pincfg_mux which is locked by + * ptp_pin_store() that calls here. + */ + if (on) { + pin_index = ptp_find_pin(pf->ptp_clock, func, chan); + if (pin_index < 0) + return -EBUSY; + + switch (pin_index) { + case SDP3_2: + pin = &pins.sdp3_2; + break; + case SDP3_3: + pin = &pins.sdp3_3; + break; + case GPIO_4: + pin = &pins.gpio_4; + break; + default: + return -EINVAL; + } + + *pin = i40e_pin_state(chan, func); + } else { + pins.sdp3_2 = off; + pins.sdp3_3 = off; + pins.gpio_4 = off; + } + + return i40e_ptp_set_pins(pf, &pins) ? -EINVAL : 0; +} + +/** + * i40e_ptp_feature_enable - Enable external clock pins * @ptp: The PTP clock structure - * @rq: The requested feature to change - * @on: Enable/disable flag + * @rq: The PTP clock request structure + * @on: To turn feature on/off * - * The XL710 does not support any of the ancillary features of the PHC - * subsystem, so this function may just return. + * Setting on/off PTP PPS feature for pin. **/ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) + struct ptp_clock_request *rq, + int on) { - return -EOPNOTSUPP; + struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); + + enum ptp_pin_function func; + unsigned int chan; + + /* TODO: Implement flags handling for EXTTS and PEROUT */ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + func = PTP_PF_EXTTS; + chan = rq->extts.index; + break; + case PTP_CLK_REQ_PEROUT: + func = PTP_PF_PEROUT; + chan = rq->perout.index; + break; + case PTP_CLK_REQ_PPS: + return i40e_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + + return i40e_ptp_enable_pin(pf, chan, func, on); } /** @@ -528,6 +950,229 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr) } /** + * i40e_ptp_free_pins - free memory used by PTP pins + * @pf: Board private structure + * + * Release memory allocated for PTP pins. + **/ +static void i40e_ptp_free_pins(struct i40e_pf *pf) +{ + if (i40e_is_ptp_pin_dev(&pf->hw)) { + kfree(pf->ptp_pins); + kfree(pf->ptp_caps.pin_config); + pf->ptp_pins = NULL; + } +} + +/** + * i40e_ptp_set_pin_hw - Set HW GPIO pin + * @hw: pointer to the hardware structure + * @pin: pin index + * @state: pin state + * + * Set status of GPIO pin for external clock handling. + **/ +static void i40e_ptp_set_pin_hw(struct i40e_hw *hw, + unsigned int pin, + enum i40e_ptp_gpio_pin_state state) +{ + switch (state) { + case off: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), 0); + break; + case in_A: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0); + break; + case in_B: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0); + break; + case out_A: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1); + break; + case out_B: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1); + break; + default: + break; + } +} + +/** + * i40e_ptp_set_led_hw - Set HW GPIO led + * @hw: pointer to the hardware structure + * @led: led index + * @state: led state + * + * Set status of GPIO led for external clock handling. + **/ +static void i40e_ptp_set_led_hw(struct i40e_hw *hw, + unsigned int led, + enum i40e_ptp_led_pin_state state) +{ + switch (state) { + case low: + wr32(hw, I40E_GLGEN_GPIO_SET, + I40E_GLGEN_GPIO_SET_DRV_SDP_DATA | led); + break; + case high: + wr32(hw, I40E_GLGEN_GPIO_SET, + I40E_GLGEN_GPIO_SET_DRV_SDP_DATA | + I40E_GLGEN_GPIO_SET_SDP_DATA_HI | led); + break; + default: + break; + } +} + +/** + * i40e_ptp_init_leds_hw - init LEDs + * @hw: pointer to a hardware structure + * + * Set initial state of LEDs + **/ +static void i40e_ptp_init_leds_hw(struct i40e_hw *hw) +{ + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_0), + I40E_GLGEN_GPIO_CTL_LED_INIT); + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_1), + I40E_GLGEN_GPIO_CTL_LED_INIT); + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_0), + I40E_GLGEN_GPIO_CTL_LED_INIT); + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_1), + I40E_GLGEN_GPIO_CTL_LED_INIT); +} + +/** + * i40e_ptp_set_pins_hw - Set HW GPIO pins + * @pf: Board private structure + * + * This function sets GPIO pins for PTP + **/ +static void i40e_ptp_set_pins_hw(struct i40e_pf *pf) +{ + const struct i40e_ptp_pins_settings *pins = pf->ptp_pins; + struct i40e_hw *hw = &pf->hw; + + /* pin must be disabled before it may be used */ + i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off); + i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off); + i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off); + + i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, pins->sdp3_2); + i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, pins->sdp3_3); + i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, pins->gpio_4); + + i40e_ptp_set_led_hw(hw, I40E_LED2_0, pins->led2_0); + i40e_ptp_set_led_hw(hw, I40E_LED2_1, pins->led2_1); + i40e_ptp_set_led_hw(hw, I40E_LED3_0, pins->led3_0); + i40e_ptp_set_led_hw(hw, I40E_LED3_1, pins->led3_1); + + dev_info(&pf->pdev->dev, + "PTP configuration set to: SDP3_2: %s, SDP3_3: %s, GPIO_4: %s.\n", + i40e_ptp_gpio_pin_state2str[pins->sdp3_2], + i40e_ptp_gpio_pin_state2str[pins->sdp3_3], + i40e_ptp_gpio_pin_state2str[pins->gpio_4]); +} + +/** + * i40e_ptp_set_pins - set PTP pins in HW + * @pf: Board private structure + * @pins: PTP pins to be applied + * + * Validate and set PTP pins in HW for specific PF. + * Return 0 on success or negative value on error. + **/ +static int i40e_ptp_set_pins(struct i40e_pf *pf, + struct i40e_ptp_pins_settings *pins) +{ + enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf); + int i = 0; + + if (pin_caps == CANT_DO_PINS) + return -EOPNOTSUPP; + else if (pin_caps == CAN_DO_PINS) + return 0; + + if (pins->sdp3_2 == invalid) + pins->sdp3_2 = pf->ptp_pins->sdp3_2; + if (pins->sdp3_3 == invalid) + pins->sdp3_3 = pf->ptp_pins->sdp3_3; + if (pins->gpio_4 == invalid) + pins->gpio_4 = pf->ptp_pins->gpio_4; + while (i40e_ptp_pin_led_allowed_states[i].sdp3_2 != end) { + if (pins->sdp3_2 == i40e_ptp_pin_led_allowed_states[i].sdp3_2 && + pins->sdp3_3 == i40e_ptp_pin_led_allowed_states[i].sdp3_3 && + pins->gpio_4 == i40e_ptp_pin_led_allowed_states[i].gpio_4) { + pins->led2_0 = + i40e_ptp_pin_led_allowed_states[i].led2_0; + pins->led2_1 = + i40e_ptp_pin_led_allowed_states[i].led2_1; + pins->led3_0 = + i40e_ptp_pin_led_allowed_states[i].led3_0; + pins->led3_1 = + i40e_ptp_pin_led_allowed_states[i].led3_1; + break; + } + i++; + } + if (i40e_ptp_pin_led_allowed_states[i].sdp3_2 == end) { + dev_warn(&pf->pdev->dev, + "Unsupported PTP pin configuration: SDP3_2: %s, SDP3_3: %s, GPIO_4: %s.\n", + i40e_ptp_gpio_pin_state2str[pins->sdp3_2], + i40e_ptp_gpio_pin_state2str[pins->sdp3_3], + i40e_ptp_gpio_pin_state2str[pins->gpio_4]); + + return -EPERM; + } + memcpy(pf->ptp_pins, pins, sizeof(*pins)); + i40e_ptp_set_pins_hw(pf); + i40_ptp_reset_timing_events(pf); + + return 0; +} + +/** + * i40e_ptp_alloc_pins - allocate PTP pins structure + * @pf: Board private structure + * + * allocate PTP pins structure + **/ +int i40e_ptp_alloc_pins(struct i40e_pf *pf) +{ + if (!i40e_is_ptp_pin_dev(&pf->hw)) + return 0; + + pf->ptp_pins = + kzalloc(sizeof(struct i40e_ptp_pins_settings), GFP_KERNEL); + + if (!pf->ptp_pins) { + dev_warn(&pf->pdev->dev, "Cannot allocate memory for PTP pins structure.\n"); + return -I40E_ERR_NO_MEMORY; + } + + pf->ptp_pins->sdp3_2 = off; + pf->ptp_pins->sdp3_3 = off; + pf->ptp_pins->gpio_4 = off; + pf->ptp_pins->led2_0 = high; + pf->ptp_pins->led2_1 = high; + pf->ptp_pins->led3_0 = high; + pf->ptp_pins->led3_1 = high; + + /* Use PF0 to set pins in HW. Return success for user space tools */ + if (pf->hw.pf_id) + return 0; + + i40e_ptp_init_leds_hw(&pf->hw); + i40e_ptp_set_pins_hw(pf); + + return 0; +} + +/** * i40e_ptp_set_timestamp_mode - setup hardware for requested timestamp mode * @pf: Board private structure * @config: hwtstamp settings requested or saved @@ -545,6 +1190,21 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, struct i40e_hw *hw = &pf->hw; u32 tsyntype, regval; + /* Selects external trigger to cause event */ + regval = rd32(hw, I40E_PRTTSYN_AUX_0(0)); + /* Bit 17:16 is EVNTLVL, 01B rising edge */ + regval &= 0; + regval |= (1 << I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT); + /* regval: 0001 0000 0000 0000 0000 */ + wr32(hw, I40E_PRTTSYN_AUX_0(0), regval); + + /* Enabel interrupts */ + regval = rd32(hw, I40E_PRTTSYN_CTL0); + regval |= 1 << I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT; + wr32(hw, I40E_PRTTSYN_CTL0, regval); + + INIT_WORK(&pf->ptp_extts0_work, i40e_ptp_extts0_work); + /* Reserved for future extensions. */ if (config->flags) return -EINVAL; @@ -688,6 +1348,45 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr) } /** + * i40e_init_pin_config - initialize pins. + * @pf: private board structure + * + * Initialize pins for external clock source. + * Return 0 on success or error code on failure. + **/ +static int i40e_init_pin_config(struct i40e_pf *pf) +{ + int i; + + pf->ptp_caps.n_pins = 3; + pf->ptp_caps.n_ext_ts = 2; + pf->ptp_caps.pps = 1; + pf->ptp_caps.n_per_out = 2; + + pf->ptp_caps.pin_config = kcalloc(pf->ptp_caps.n_pins, + sizeof(*pf->ptp_caps.pin_config), + GFP_KERNEL); + if (!pf->ptp_caps.pin_config) + return -ENOMEM; + + for (i = 0; i < pf->ptp_caps.n_pins; i++) { + snprintf(pf->ptp_caps.pin_config[i].name, + sizeof(pf->ptp_caps.pin_config[i].name), + "%s", sdp_desc[i].name); + pf->ptp_caps.pin_config[i].index = sdp_desc[i].index; + pf->ptp_caps.pin_config[i].func = PTP_PF_NONE; + pf->ptp_caps.pin_config[i].chan = sdp_desc[i].chan; + } + + pf->ptp_caps.verify = i40e_ptp_verify; + pf->ptp_caps.enable = i40e_ptp_feature_enable; + + pf->ptp_caps.pps = 1; + + return 0; +} + +/** * i40e_ptp_create_clock - Create PTP clock device for userspace * @pf: Board private structure * @@ -707,13 +1406,16 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf) sizeof(pf->ptp_caps.name) - 1); pf->ptp_caps.owner = THIS_MODULE; pf->ptp_caps.max_adj = 999999999; - pf->ptp_caps.n_ext_ts = 0; - pf->ptp_caps.pps = 0; pf->ptp_caps.adjfreq = i40e_ptp_adjfreq; pf->ptp_caps.adjtime = i40e_ptp_adjtime; pf->ptp_caps.gettimex64 = i40e_ptp_gettimex; pf->ptp_caps.settime64 = i40e_ptp_settime; - pf->ptp_caps.enable = i40e_ptp_feature_enable; + if (i40e_is_ptp_pin_dev(&pf->hw)) { + int err = i40e_init_pin_config(pf); + + if (err) + return err; + } /* Attempt to register the clock before enabling the hardware. */ pf->ptp_clock = ptp_clock_register(&pf->ptp_caps, &pf->pdev->dev); @@ -843,6 +1545,8 @@ void i40e_ptp_init(struct i40e_pf *pf) /* Restore the clock time based on last known value */ i40e_ptp_restore_hw_time(pf); } + + i40e_ptp_set_1pps_signal_hw(pf); } /** @@ -854,6 +1558,9 @@ void i40e_ptp_init(struct i40e_pf *pf) **/ void i40e_ptp_stop(struct i40e_pf *pf) { + struct i40e_hw *hw = &pf->hw; + u32 regval; + pf->flags &= ~I40E_FLAG_PTP; pf->ptp_tx = false; pf->ptp_rx = false; @@ -872,4 +1579,21 @@ void i40e_ptp_stop(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "%s: removed PHC on %s\n", __func__, pf->vsi[pf->lan_vsi]->netdev->name); } + + if (i40e_is_ptp_pin_dev(&pf->hw)) { + i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off); + i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off); + i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off); + } + + regval = rd32(hw, I40E_PRTTSYN_AUX_0(0)); + regval &= ~I40E_PRTTSYN_AUX_0_PTPFLAG_MASK; + wr32(hw, I40E_PRTTSYN_AUX_0(0), regval); + + /* Disable interrupts */ + regval = rd32(hw, I40E_PRTTSYN_CTL0); + regval &= ~I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK; + wr32(hw, I40E_PRTTSYN_CTL0, regval); + + i40e_ptp_free_pins(pf); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 36f7b27a04ae..8d0588a27a05 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -182,11 +182,20 @@ #define I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK I40E_MASK(0x3, I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT) #define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT 3 #define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT 4 +#define I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT 5 +#define I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT 6 #define I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT 7 #define I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK I40E_MASK(0x7, I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) #define I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT 11 #define I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT 12 #define I40E_GLGEN_GPIO_CTL_LED_MODE_MASK I40E_MASK(0x1F, I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) +#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT 19 +#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20 +#define I40E_GLGEN_GPIO_SET 0x00088184 /* Reset: POR */ +#define I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT 5 +#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT 6 #define I40E_GLGEN_MDIO_I2C_SEL(_i) (0x000881C0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */ #define I40E_GLGEN_MSCA(_i) (0x0008818C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */ #define I40E_GLGEN_MSCA_MDIADD_SHIFT 0 @@ -540,6 +549,7 @@ #define I40E_PF_PCI_CIAA_VF_NUM_SHIFT 12 #define I40E_PF_PCI_CIAD 0x0009C100 /* Reset: FLR */ #define I40E_PRTPM_EEE_STAT 0x001E4320 /* Reset: GLOBR */ +#define I40E_PFPCI_SUBSYSID 0x000BE100 /* Reset: PCIR */ #define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT 30 #define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT) #define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31 @@ -742,6 +752,8 @@ #define I40E_PRTTSYN_CTL0 0x001E4200 /* Reset: GLOBR */ #define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT 1 #define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT) +#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT 2 +#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT) #define I40E_PRTTSYN_CTL0_PF_ID_SHIFT 8 #define I40E_PRTTSYN_CTL0_PF_ID_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL0_PF_ID_SHIFT) #define I40E_PRTTSYN_CTL0_TSYNENA_SHIFT 31 @@ -760,7 +772,10 @@ #define I40E_PRTTSYN_INC_L 0x001E4040 /* Reset: GLOBR */ #define I40E_PRTTSYN_RXTIME_H(_i) (0x00085040 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */ #define I40E_PRTTSYN_RXTIME_L(_i) (0x000850C0 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */ +#define I40E_PRTTSYN_RXTIME_L_MAX_INDEX 3 #define I40E_PRTTSYN_STAT_0 0x001E4220 /* Reset: GLOBR */ +#define I40E_PRTTSYN_STAT_0_EVENT0_SHIFT 0 +#define I40E_PRTTSYN_STAT_0_EVENT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_EVENT0_SHIFT) #define I40E_PRTTSYN_STAT_0_TXTIME_SHIFT 4 #define I40E_PRTTSYN_STAT_0_TXTIME_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TXTIME_SHIFT) #define I40E_PRTTSYN_STAT_1 0x00085140 /* Reset: CORER */ @@ -768,6 +783,20 @@ #define I40E_PRTTSYN_TIME_L 0x001E4100 /* Reset: GLOBR */ #define I40E_PRTTSYN_TXTIME_H 0x001E41E0 /* Reset: GLOBR */ #define I40E_PRTTSYN_TXTIME_L 0x001E41C0 /* Reset: GLOBR */ +#define I40E_PRTTSYN_EVNT_H(_i) (0x001E40C0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_EVNT_L(_i) (0x001E4080 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_AUX_0(_i) (0x001E42A0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT 0 +#define I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT 1 +#define I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT 16 +#define I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT 17 +#define I40E_PRTTSYN_AUX_0_PTPFLAG_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT) +#define I40E_PRTTSYN_AUX_1(_i) (0x001E42E0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_AUX_1_INSTNT_SHIFT 0 +#define I40E_PRTTSYN_TGT_H(_i) (0x001E4180 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_TGT_L(_i) (0x001E4140 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_CLKO(_i) (0x001E4240 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_ADJ 0x001E4280 /* Reset: GLOBR */ #define I40E_GL_MDET_RX 0x0012A510 /* Reset: CORER */ #define I40E_GL_MDET_RX_FUNCTION_SHIFT 0 #define I40E_GL_MDET_RX_FUNCTION_MASK I40E_MASK(0xFF, I40E_GL_MDET_RX_FUNCTION_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index eff0a30790dd..472f56b360b8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1160,12 +1160,12 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf) } /** - * i40e_getnum_vf_vsi_vlan_filters + * __i40e_getnum_vf_vsi_vlan_filters * @vsi: pointer to the vsi * * called to get the number of VLANs offloaded on this VF **/ -static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) +static int __i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; u16 num_vlans = 0, bkt; @@ -1179,6 +1179,23 @@ static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) } /** + * i40e_getnum_vf_vsi_vlan_filters + * @vsi: pointer to the vsi + * + * wrapper for __i40e_getnum_vf_vsi_vlan_filters() with spinlock held + **/ +static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) +{ + int num_vlans; + + spin_lock_bh(&vsi->mac_filter_hash_lock); + num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi); + spin_unlock_bh(&vsi->mac_filter_hash_lock); + + return num_vlans; +} + +/** * i40e_get_vlan_list_sync * @vsi: pointer to the VSI * @num_vlans: number of VLANs in mac_filter_hash, returned to caller @@ -1195,7 +1212,7 @@ static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, u16 *num_vlans, int bkt; spin_lock_bh(&vsi->mac_filter_hash_lock); - *num_vlans = i40e_getnum_vf_vsi_vlan_filters(vsi); + *num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi); *vlan_list = kcalloc(*num_vlans, sizeof(**vlan_list), GFP_ATOMIC); if (!(*vlan_list)) goto err; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 44bafedd09f2..fa6cf20da911 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -132,6 +132,30 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, } /** + * iavf_lock_timeout - try to set bit but give up after timeout + * @adapter: board private structure + * @bit: bit to set + * @msecs: timeout in msecs + * + * Returns 0 on success, negative on failure + **/ +static int iavf_lock_timeout(struct iavf_adapter *adapter, + enum iavf_critical_section_t bit, + unsigned int msecs) +{ + unsigned int wait, delay = 10; + + for (wait = 0; wait < msecs; wait += delay) { + if (!test_and_set_bit(bit, &adapter->crit_section)) + return 0; + + msleep(delay); + } + + return -1; +} + +/** * iavf_schedule_reset - Set the flags and schedule a reset event * @adapter: board private structure **/ @@ -1988,7 +2012,6 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - adapter->state = __IAVF_RESETTING; adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -2102,6 +2125,10 @@ static void iavf_reset_task(struct work_struct *work) if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) { + schedule_work(&adapter->reset_task); + return; + } while (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section)) usleep_range(500, 1000); @@ -2308,6 +2335,8 @@ static void iavf_adminq_task(struct work_struct *work) if (!event.msg_buf) goto out; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) + goto freedom; do { ret = iavf_clean_arq_element(hw, &event, &pending); v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); @@ -2321,6 +2350,7 @@ static void iavf_adminq_task(struct work_struct *work) if (pending != 0) memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); } while (pending); + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); if ((adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || @@ -3625,6 +3655,10 @@ static void iavf_init_task(struct work_struct *work) init_task.work); struct iavf_hw *hw = &adapter->hw; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) { + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + return; + } switch (adapter->state) { case __IAVF_STARTUP: if (iavf_startup(adapter) < 0) @@ -3637,14 +3671,14 @@ static void iavf_init_task(struct work_struct *work) case __IAVF_INIT_GET_RESOURCES: if (iavf_init_get_resources(adapter) < 0) goto init_failed; - return; + goto out; default: goto init_failed; } queue_delayed_work(iavf_wq, &adapter->init_task, msecs_to_jiffies(30)); - return; + goto out; init_failed: if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { dev_err(&adapter->pdev->dev, @@ -3653,9 +3687,11 @@ init_failed: iavf_shutdown_adminq(hw); adapter->state = __IAVF_STARTUP; queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); - return; + goto out; } queue_delayed_work(iavf_wq, &adapter->init_task, HZ); +out: + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); } /** @@ -3672,9 +3708,12 @@ static void iavf_shutdown(struct pci_dev *pdev) if (netif_running(netdev)) iavf_close(netdev); + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ adapter->state = __IAVF_REMOVE; adapter->aq_required = 0; + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); #ifdef CONFIG_PM pci_save_state(pdev); @@ -3908,10 +3947,6 @@ static void iavf_remove(struct pci_dev *pdev) err); } - /* Shut down all the garbage mashers on the detention level */ - adapter->state = __IAVF_REMOVE; - adapter->aq_required = 0; - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ @@ -3919,6 +3954,13 @@ static void iavf_remove(struct pci_dev *pdev) iavf_request_reset(adapter); msleep(50); } + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + + /* Shut down all the garbage mashers on the detention level */ + adapter->state = __IAVF_REMOVE; + adapter->aq_required = 0; + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); iavf_misc_irq_disable(adapter); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ef8d1815af56..33916ed9e874 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6558,12 +6558,12 @@ event_after: } /** - * ice_do_ioctl - Access the hwtstamp interface + * ice_eth_ioctl - Access the hwtstamp interface * @netdev: network interface device structure * @ifr: interface request data * @cmd: ioctl command */ -static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; @@ -7229,7 +7229,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_set_tx_maxrate = ice_set_tx_maxrate, - .ndo_do_ioctl = ice_do_ioctl, + .ndo_eth_ioctl = ice_eth_ioctl, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index e63ee3cca5ea..1277c5c7d099 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -492,6 +492,7 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) **/ static void igb_i21x_hw_doublecheck(struct e1000_hw *hw) { + int failed_cnt = 3; bool is_failed; int i; @@ -502,9 +503,12 @@ static void igb_i21x_hw_doublecheck(struct e1000_hw *hw) is_failed = true; array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); wrfl(); - break; } } + if (is_failed && --failed_cnt <= 0) { + hw_dbg("Failed to update MTA_REGISTER, too many retries"); + break; + } } while (is_failed); } diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 636a1b1fb7e1..17f5c003c3df 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2343,8 +2343,7 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_TEST: - memcpy(data, *igb_gstrings_test, - IGB_TEST_LEN*ETH_GSTRING_LEN); + memcpy(data, igb_gstrings_test, sizeof(igb_gstrings_test)); break; case ETH_SS_STATS: for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 171a7a629b20..751de06019a0 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2991,7 +2991,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_set_rx_mode = igb_set_rx_mode, .ndo_set_mac_address = igb_set_mac, .ndo_change_mtu = igb_change_mtu, - .ndo_do_ioctl = igb_ioctl, + .ndo_eth_ioctl = igb_ioctl, .ndo_tx_timeout = igb_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 1bbe9862a758..d32e72d953c8 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2657,7 +2657,7 @@ static const struct net_device_ops igbvf_netdev_ops = { .ndo_set_rx_mode = igbvf_set_rx_mode, .ndo_set_mac_address = igbvf_set_mac, .ndo_change_mtu = igbvf_change_mtu, - .ndo_do_ioctl = igbvf_ioctl, + .ndo_eth_ioctl = igbvf_ioctl, .ndo_tx_timeout = igbvf_tx_timeout, .ndo_vlan_rx_add_vid = igbvf_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = igbvf_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 5901ed9fb545..a0ecfe5a4078 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -33,6 +33,8 @@ void igc_ethtool_set_ops(struct net_device *); #define IGC_N_PEROUT 2 #define IGC_N_SDP 4 +#define MAX_FLEX_FILTER 32 + enum igc_mac_filter_type { IGC_MAC_FILTER_TYPE_DST = 0, IGC_MAC_FILTER_TYPE_SRC @@ -476,18 +478,28 @@ struct igc_q_vector { }; enum igc_filter_match_flags { - IGC_FILTER_FLAG_ETHER_TYPE = 0x1, - IGC_FILTER_FLAG_VLAN_TCI = 0x2, - IGC_FILTER_FLAG_SRC_MAC_ADDR = 0x4, - IGC_FILTER_FLAG_DST_MAC_ADDR = 0x8, + IGC_FILTER_FLAG_ETHER_TYPE = BIT(0), + IGC_FILTER_FLAG_VLAN_TCI = BIT(1), + IGC_FILTER_FLAG_SRC_MAC_ADDR = BIT(2), + IGC_FILTER_FLAG_DST_MAC_ADDR = BIT(3), + IGC_FILTER_FLAG_USER_DATA = BIT(4), + IGC_FILTER_FLAG_VLAN_ETYPE = BIT(5), }; struct igc_nfc_filter { u8 match_flags; u16 etype; + __be16 vlan_etype; u16 vlan_tci; u8 src_addr[ETH_ALEN]; u8 dst_addr[ETH_ALEN]; + u8 user_data[8]; + u8 user_mask[8]; + u8 flex_index; + u8 rx_queue; + u8 prio; + u8 immediate_irq; + u8 drop; }; struct igc_nfc_rule { @@ -495,12 +507,24 @@ struct igc_nfc_rule { struct igc_nfc_filter filter; u32 location; u16 action; + bool flex; }; -/* IGC supports a total of 32 NFC rules: 16 MAC address based,, 8 VLAN priority - * based, and 8 ethertype based. +/* IGC supports a total of 32 NFC rules: 16 MAC address based, 8 VLAN priority + * based, 8 ethertype based and 32 Flex filter based rules. */ -#define IGC_MAX_RXNFC_RULES 32 +#define IGC_MAX_RXNFC_RULES 64 + +struct igc_flex_filter { + u8 index; + u8 data[128]; + u8 mask[16]; + u8 length; + u8 rx_queue; + u8 prio; + u8 immediate_irq; + u8 drop; +}; /* igc_desc_unused - calculate if we have unused descriptors */ static inline u16 igc_desc_unused(const struct igc_ring *ring) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index d0700d48ecf9..84f142f5e472 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -187,15 +187,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) igc_check_for_copper_link(hw); - /* Verify phy id and set remaining function pointers */ - switch (phy->id) { - case I225_I_PHY_ID: - phy->type = igc_phy_i225; - break; - default: - ret_val = -IGC_ERR_PHY; - goto out; - } + phy->type = igc_phy_i225; out: return ret_val; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index c3a5a5518790..c6315690e20f 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -17,11 +17,22 @@ #define IGC_WUC_PME_EN 0x00000002 /* PME Enable */ /* Wake Up Filter Control */ -#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ -#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ -#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ -#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ -#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IGC_WUFC_FLEX_HQ BIT(14) /* Flex Filters Host Queuing */ +#define IGC_WUFC_FLX0 BIT(16) /* Flexible Filter 0 Enable */ +#define IGC_WUFC_FLX1 BIT(17) /* Flexible Filter 1 Enable */ +#define IGC_WUFC_FLX2 BIT(18) /* Flexible Filter 2 Enable */ +#define IGC_WUFC_FLX3 BIT(19) /* Flexible Filter 3 Enable */ +#define IGC_WUFC_FLX4 BIT(20) /* Flexible Filter 4 Enable */ +#define IGC_WUFC_FLX5 BIT(21) /* Flexible Filter 5 Enable */ +#define IGC_WUFC_FLX6 BIT(22) /* Flexible Filter 6 Enable */ +#define IGC_WUFC_FLX7 BIT(23) /* Flexible Filter 7 Enable */ + +#define IGC_WUFC_FILTER_MASK GENMASK(23, 14) #define IGC_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ @@ -46,6 +57,37 @@ /* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */ #define IGC_WUPM_BYTES 128 +/* Wakeup Filter Control Extended */ +#define IGC_WUFC_EXT_FLX8 BIT(8) /* Flexible Filter 8 Enable */ +#define IGC_WUFC_EXT_FLX9 BIT(9) /* Flexible Filter 9 Enable */ +#define IGC_WUFC_EXT_FLX10 BIT(10) /* Flexible Filter 10 Enable */ +#define IGC_WUFC_EXT_FLX11 BIT(11) /* Flexible Filter 11 Enable */ +#define IGC_WUFC_EXT_FLX12 BIT(12) /* Flexible Filter 12 Enable */ +#define IGC_WUFC_EXT_FLX13 BIT(13) /* Flexible Filter 13 Enable */ +#define IGC_WUFC_EXT_FLX14 BIT(14) /* Flexible Filter 14 Enable */ +#define IGC_WUFC_EXT_FLX15 BIT(15) /* Flexible Filter 15 Enable */ +#define IGC_WUFC_EXT_FLX16 BIT(16) /* Flexible Filter 16 Enable */ +#define IGC_WUFC_EXT_FLX17 BIT(17) /* Flexible Filter 17 Enable */ +#define IGC_WUFC_EXT_FLX18 BIT(18) /* Flexible Filter 18 Enable */ +#define IGC_WUFC_EXT_FLX19 BIT(19) /* Flexible Filter 19 Enable */ +#define IGC_WUFC_EXT_FLX20 BIT(20) /* Flexible Filter 20 Enable */ +#define IGC_WUFC_EXT_FLX21 BIT(21) /* Flexible Filter 21 Enable */ +#define IGC_WUFC_EXT_FLX22 BIT(22) /* Flexible Filter 22 Enable */ +#define IGC_WUFC_EXT_FLX23 BIT(23) /* Flexible Filter 23 Enable */ +#define IGC_WUFC_EXT_FLX24 BIT(24) /* Flexible Filter 24 Enable */ +#define IGC_WUFC_EXT_FLX25 BIT(25) /* Flexible Filter 25 Enable */ +#define IGC_WUFC_EXT_FLX26 BIT(26) /* Flexible Filter 26 Enable */ +#define IGC_WUFC_EXT_FLX27 BIT(27) /* Flexible Filter 27 Enable */ +#define IGC_WUFC_EXT_FLX28 BIT(28) /* Flexible Filter 28 Enable */ +#define IGC_WUFC_EXT_FLX29 BIT(29) /* Flexible Filter 29 Enable */ +#define IGC_WUFC_EXT_FLX30 BIT(30) /* Flexible Filter 30 Enable */ +#define IGC_WUFC_EXT_FLX31 BIT(31) /* Flexible Filter 31 Enable */ + +#define IGC_WUFC_EXT_FILTER_MASK GENMASK(31, 8) + +/* Physical Func Reset Done Indication */ +#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000 + /* Loop limit on how long we wait for auto-negotiation to complete */ #define COPPER_LINK_UP_LIMIT 10 #define PHY_AUTO_NEG_LIMIT 45 diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index fa4171860623..d3e84416248e 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -979,6 +979,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, eth_broadcast_addr(fsp->m_u.ether_spec.h_source); } + if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) { + fsp->flow_type |= FLOW_EXT; + memcpy(fsp->h_ext.data, rule->filter.user_data, sizeof(fsp->h_ext.data)); + memcpy(fsp->m_ext.data, rule->filter.user_mask, sizeof(fsp->m_ext.data)); + } + mutex_unlock(&adapter->nfc_rule_lock); return 0; @@ -1215,6 +1221,30 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule, ether_addr_copy(rule->filter.dst_addr, fsp->h_u.ether_spec.h_dest); } + + /* VLAN etype matching */ + if ((fsp->flow_type & FLOW_EXT) && fsp->h_ext.vlan_etype) { + rule->filter.vlan_etype = fsp->h_ext.vlan_etype; + rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_ETYPE; + } + + /* Check for user defined data */ + if ((fsp->flow_type & FLOW_EXT) && + (fsp->h_ext.data[0] || fsp->h_ext.data[1])) { + rule->filter.match_flags |= IGC_FILTER_FLAG_USER_DATA; + memcpy(rule->filter.user_data, fsp->h_ext.data, sizeof(fsp->h_ext.data)); + memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data)); + } + + /* When multiple filter options or user data or vlan etype is set, use a + * flex filter. + */ + if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) || + (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) || + (rule->filter.match_flags & (rule->filter.match_flags - 1))) + rule->flex = true; + else + rule->flex = false; } /** @@ -1244,11 +1274,6 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter, return -EINVAL; } - if (flags & (flags - 1)) { - netdev_dbg(dev, "Rule with multiple matches not supported\n"); - return -EOPNOTSUPP; - } - list_for_each_entry(tmp, &adapter->nfc_rule_list, list) { if (!memcmp(&rule->filter, &tmp->filter, sizeof(rule->filter)) && @@ -1280,12 +1305,6 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter, return -EOPNOTSUPP; } - if ((fsp->flow_type & FLOW_EXT) && - fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { - netdev_dbg(netdev, "VLAN mask not supported\n"); - return -EOPNOTSUPP; - } - if (fsp->ring_cookie >= adapter->num_rx_queues) { netdev_dbg(netdev, "Invalid action\n"); return -EINVAL; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e29aadbc6744..b7aab35c1132 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3075,11 +3075,320 @@ static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) etype); } +static int igc_flex_filter_select(struct igc_adapter *adapter, + struct igc_flex_filter *input, + u32 *fhft) +{ + struct igc_hw *hw = &adapter->hw; + u8 fhft_index; + u32 fhftsl; + + if (input->index >= MAX_FLEX_FILTER) { + dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n"); + return -EINVAL; + } + + /* Indirect table select register */ + fhftsl = rd32(IGC_FHFTSL); + fhftsl &= ~IGC_FHFTSL_FTSL_MASK; + switch (input->index) { + case 0 ... 7: + fhftsl |= 0x00; + break; + case 8 ... 15: + fhftsl |= 0x01; + break; + case 16 ... 23: + fhftsl |= 0x02; + break; + case 24 ... 31: + fhftsl |= 0x03; + break; + } + wr32(IGC_FHFTSL, fhftsl); + + /* Normalize index down to host table register */ + fhft_index = input->index % 8; + + *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : + IGC_FHFT_EXT(fhft_index - 4); + + return 0; +} + +static int igc_write_flex_filter_ll(struct igc_adapter *adapter, + struct igc_flex_filter *input) +{ + struct device *dev = &adapter->pdev->dev; + struct igc_hw *hw = &adapter->hw; + u8 *data = input->data; + u8 *mask = input->mask; + u32 queuing; + u32 fhft; + u32 wufc; + int ret; + int i; + + /* Length has to be aligned to 8. Otherwise the filter will fail. Bail + * out early to avoid surprises later. + */ + if (input->length % 8 != 0) { + dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n"); + return -EINVAL; + } + + /* Select corresponding flex filter register and get base for host table. */ + ret = igc_flex_filter_select(adapter, input, &fhft); + if (ret) + return ret; + + /* When adding a filter globally disable flex filter feature. That is + * recommended within the datasheet. + */ + wufc = rd32(IGC_WUFC); + wufc &= ~IGC_WUFC_FLEX_HQ; + wr32(IGC_WUFC, wufc); + + /* Configure filter */ + queuing = input->length & IGC_FHFT_LENGTH_MASK; + queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK; + queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK; + + if (input->immediate_irq) + queuing |= IGC_FHFT_IMM_INT; + + if (input->drop) + queuing |= IGC_FHFT_DROP; + + wr32(fhft + 0xFC, queuing); + + /* Write data (128 byte) and mask (128 bit) */ + for (i = 0; i < 16; ++i) { + const size_t data_idx = i * 8; + const size_t row_idx = i * 16; + u32 dw0 = + (data[data_idx + 0] << 0) | + (data[data_idx + 1] << 8) | + (data[data_idx + 2] << 16) | + (data[data_idx + 3] << 24); + u32 dw1 = + (data[data_idx + 4] << 0) | + (data[data_idx + 5] << 8) | + (data[data_idx + 6] << 16) | + (data[data_idx + 7] << 24); + u32 tmp; + + /* Write row: dw0, dw1 and mask */ + wr32(fhft + row_idx, dw0); + wr32(fhft + row_idx + 4, dw1); + + /* mask is only valid for MASK(7, 0) */ + tmp = rd32(fhft + row_idx + 8); + tmp &= ~GENMASK(7, 0); + tmp |= mask[i]; + wr32(fhft + row_idx + 8, tmp); + } + + /* Enable filter. */ + wufc |= IGC_WUFC_FLEX_HQ; + if (input->index > 8) { + /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ + u32 wufc_ext = rd32(IGC_WUFC_EXT); + + wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); + + wr32(IGC_WUFC_EXT, wufc_ext); + } else { + wufc |= (IGC_WUFC_FLX0 << input->index); + } + wr32(IGC_WUFC, wufc); + + dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n", + input->index); + + return 0; +} + +static void igc_flex_filter_add_field(struct igc_flex_filter *flex, + const void *src, unsigned int offset, + size_t len, const void *mask) +{ + int i; + + /* data */ + memcpy(&flex->data[offset], src, len); + + /* mask */ + for (i = 0; i < len; ++i) { + const unsigned int idx = i + offset; + const u8 *ptr = mask; + + if (mask) { + if (ptr[i] & 0xff) + flex->mask[idx / 8] |= BIT(idx % 8); + + continue; + } + + flex->mask[idx / 8] |= BIT(idx % 8); + } +} + +static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc, wufc_ext; + int i; + + wufc = rd32(IGC_WUFC); + wufc_ext = rd32(IGC_WUFC_EXT); + + for (i = 0; i < MAX_FLEX_FILTER; i++) { + if (i < 8) { + if (!(wufc & (IGC_WUFC_FLX0 << i))) + return i; + } else { + if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) + return i; + } + } + + return -ENOSPC; +} + +static bool igc_flex_filter_in_use(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc, wufc_ext; + + wufc = rd32(IGC_WUFC); + wufc_ext = rd32(IGC_WUFC_EXT); + + if (wufc & IGC_WUFC_FILTER_MASK) + return true; + + if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) + return true; + + return false; +} + +static int igc_add_flex_filter(struct igc_adapter *adapter, + struct igc_nfc_rule *rule) +{ + struct igc_flex_filter flex = { }; + struct igc_nfc_filter *filter = &rule->filter; + unsigned int eth_offset, user_offset; + int ret, index; + bool vlan; + + index = igc_find_avail_flex_filter_slot(adapter); + if (index < 0) + return -ENOSPC; + + /* Construct the flex filter: + * -> dest_mac [6] + * -> src_mac [6] + * -> tpid [2] + * -> vlan tci [2] + * -> ether type [2] + * -> user data [8] + * -> = 26 bytes => 32 length + */ + flex.index = index; + flex.length = 32; + flex.rx_queue = rule->action; + + vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; + eth_offset = vlan ? 16 : 12; + user_offset = vlan ? 18 : 14; + + /* Add destination MAC */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) + igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, + ETH_ALEN, NULL); + + /* Add source MAC */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) + igc_flex_filter_add_field(&flex, &filter->src_addr, 6, + ETH_ALEN, NULL); + + /* Add VLAN etype */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) + igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12, + sizeof(filter->vlan_etype), + NULL); + + /* Add VLAN TCI */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) + igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, + sizeof(filter->vlan_tci), NULL); + + /* Add Ether type */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { + __be16 etype = cpu_to_be16(filter->etype); + + igc_flex_filter_add_field(&flex, &etype, eth_offset, + sizeof(etype), NULL); + } + + /* Add user data */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) + igc_flex_filter_add_field(&flex, &filter->user_data, + user_offset, + sizeof(filter->user_data), + filter->user_mask); + + /* Add it down to the hardware and enable it. */ + ret = igc_write_flex_filter_ll(adapter, &flex); + if (ret) + return ret; + + filter->flex_index = index; + + return 0; +} + +static void igc_del_flex_filter(struct igc_adapter *adapter, + u16 reg_index) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc; + + /* Just disable the filter. The filter table itself is kept + * intact. Another flex_filter_add() should override the "old" data + * then. + */ + if (reg_index > 8) { + u32 wufc_ext = rd32(IGC_WUFC_EXT); + + wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); + wr32(IGC_WUFC_EXT, wufc_ext); + } else { + wufc = rd32(IGC_WUFC); + + wufc &= ~(IGC_WUFC_FLX0 << reg_index); + wr32(IGC_WUFC, wufc); + } + + if (igc_flex_filter_in_use(adapter)) + return; + + /* No filters are in use, we may disable flex filters */ + wufc = rd32(IGC_WUFC); + wufc &= ~IGC_WUFC_FLEX_HQ; + wr32(IGC_WUFC, wufc); +} + static int igc_enable_nfc_rule(struct igc_adapter *adapter, - const struct igc_nfc_rule *rule) + struct igc_nfc_rule *rule) { int err; + if (rule->flex) { + return igc_add_flex_filter(adapter, rule); + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { err = igc_add_etype_filter(adapter, rule->filter.etype, rule->action); @@ -3116,6 +3425,11 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter, static void igc_disable_nfc_rule(struct igc_adapter *adapter, const struct igc_nfc_rule *rule) { + if (rule->flex) { + igc_del_flex_filter(adapter, rule->filter.flex_index); + return; + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) igc_del_etype_filter(adapter, rule->filter.etype); @@ -4811,6 +5125,7 @@ static irqreturn_t igc_msix_ring(int irq, void *data) */ static int igc_request_msix(struct igc_adapter *adapter) { + unsigned int num_q_vectors = adapter->num_q_vectors; int i = 0, err = 0, vector = 0, free_vector = 0; struct net_device *netdev = adapter->netdev; @@ -4819,7 +5134,13 @@ static int igc_request_msix(struct igc_adapter *adapter) if (err) goto err_out; - for (i = 0; i < adapter->num_q_vectors; i++) { + if (num_q_vectors > MAX_Q_VECTORS) { + num_q_vectors = MAX_Q_VECTORS; + dev_warn(&adapter->pdev->dev, + "The number of queue vectors (%d) is higher than max allowed (%d)\n", + adapter->num_q_vectors, MAX_Q_VECTORS); + } + for (i = 0; i < num_q_vectors; i++) { struct igc_q_vector *q_vector = adapter->q_vector[i]; vector++; @@ -4898,20 +5219,12 @@ bool igc_has_link(struct igc_adapter *adapter) * false until the igc_check_for_link establishes link * for copper adapters ONLY */ - switch (hw->phy.media_type) { - case igc_media_type_copper: - if (!hw->mac.get_link_status) - return true; - hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; - break; - default: - case igc_media_type_unknown: - break; - } + if (!hw->mac.get_link_status) + return true; + hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; - if (hw->mac.type == igc_i225 && - hw->phy.id == I225_I_PHY_ID) { + if (hw->mac.type == igc_i225) { if (!netif_carrier_ok(adapter->netdev)) { adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { @@ -4999,7 +5312,9 @@ static void igc_watchdog_task(struct work_struct *work) adapter->tx_timeout_factor = 14; break; case SPEED_100: - /* maybe add some timeout factor ? */ + case SPEED_1000: + case SPEED_2500: + adapter->tx_timeout_factor = 7; break; } @@ -5698,7 +6013,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_fix_features = igc_fix_features, .ndo_set_features = igc_set_features, .ndo_features_check = igc_features_check, - .ndo_do_ioctl = igc_ioctl, + .ndo_eth_ioctl = igc_ioctl, .ndo_setup_tc = igc_setup_tc, .ndo_bpf = igc_bpf, .ndo_xdp_xmit = igc_xdp_xmit, diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 83aeb5e7076f..5cad31c3c7b0 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -249,8 +249,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) return ret_val; } - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) { + if (phy->autoneg_mask & ADVERTISE_2500_FULL) { /* Read the MULTI GBT AN Control Register - reg 7.32 */ ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | @@ -390,8 +389,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) + if (phy->autoneg_mask & ADVERTISE_2500_FULL) ret_val = phy->ops.write_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 0f82990567d9..828c3501c448 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -67,6 +67,9 @@ /* Filtering Registers */ #define IGC_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ +#define IGC_FHFT(_n) (0x09000 + (256 * (_n))) /* Flexible Host Filter */ +#define IGC_FHFT_EXT(_n) (0x09A00 + (256 * (_n))) /* Flexible Host Filter Extended */ +#define IGC_FHFTSL 0x05804 /* Flex Filter indirect table select */ /* ETQF register bit definitions */ #define IGC_ETQF_FILTER_ENABLE BIT(26) @@ -75,6 +78,19 @@ #define IGC_ETQF_QUEUE_MASK 0x00070000 #define IGC_ETQF_ETYPE_MASK 0x0000FFFF +/* FHFT register bit definitions */ +#define IGC_FHFT_LENGTH_MASK GENMASK(7, 0) +#define IGC_FHFT_QUEUE_SHIFT 8 +#define IGC_FHFT_QUEUE_MASK GENMASK(10, 8) +#define IGC_FHFT_PRIO_SHIFT 16 +#define IGC_FHFT_PRIO_MASK GENMASK(18, 16) +#define IGC_FHFT_IMM_INT BIT(24) +#define IGC_FHFT_DROP BIT(25) + +/* FHFTSL register bit definitions */ +#define IGC_FHFTSL_FTSL_SHIFT 0 +#define IGC_FHFTSL_FTSL_MASK GENMASK(1, 0) + /* Redirection Table - RW Array */ #define IGC_RETA(_i) (0x05C00 + ((_i) * 4)) /* RSS Random Key - RW Array */ @@ -240,6 +256,7 @@ #define IGC_WUFC 0x05808 /* Wakeup Filter Control - RW */ #define IGC_WUS 0x05810 /* Wakeup Status - R/W1C */ #define IGC_WUPL 0x05900 /* Wakeup Packet Length - RW */ +#define IGC_WUFC_EXT 0x0580C /* Wakeup Filter Control Register Extended - RW */ /* Wake Up packet memory */ #define IGC_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 174103c4bea6..4dbbb8a32ce9 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -52,7 +52,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_ENDQT(i), NSEC_PER_SEC); } - wr32(IGC_QBVCYCLET_S, NSEC_PER_SEC); + wr32(IGC_QBVCYCLET_S, 0); wr32(IGC_QBVCYCLET, NSEC_PER_SEC); adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 14aea40da50f..24e06ba6f5e9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10247,7 +10247,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_tx_maxrate = ixgbe_tx_maxrate, .ndo_vlan_rx_add_vid = ixgbe_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ixgbe_vlan_rx_kill_vid, - .ndo_do_ioctl = ixgbe_ioctl, + .ndo_eth_ioctl = ixgbe_ioctl, .ndo_set_vf_mac = ixgbe_ndo_set_vf_mac, .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, .ndo_set_vf_rate = ixgbe_ndo_set_vf_bw, diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index f1b9284e0bea..1251b74fe0e2 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2901,7 +2901,7 @@ static const struct net_device_ops jme_netdev_ops = { .ndo_open = jme_open, .ndo_stop = jme_close, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = jme_ioctl, + .ndo_eth_ioctl = jme_ioctl, .ndo_start_xmit = jme_start_xmit, .ndo_set_mac_address = jme_set_macaddr, .ndo_set_rx_mode = jme_set_multi, diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index b30a45725374..3e9f324f1061 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1272,7 +1272,7 @@ static const struct net_device_ops korina_netdev_ops = { .ndo_start_xmit = korina_send_packet, .ndo_set_rx_mode = korina_multicast_list, .ndo_tx_timeout = korina_tx_timeout, - .ndo_do_ioctl = korina_ioctl, + .ndo_eth_ioctl = korina_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 2d0c52f7106b..62f8c5212182 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -609,7 +609,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = { .ndo_stop = ltq_etop_stop, .ndo_start_xmit = ltq_etop_tx, .ndo_change_mtu = ltq_etop_change_mtu, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_set_mac_address = ltq_etop_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = ltq_etop_set_multicast_list, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index d207bfcaf31d..6502c5c2ebca 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3060,7 +3060,7 @@ static const struct net_device_ops mv643xx_eth_netdev_ops = { .ndo_set_rx_mode = mv643xx_eth_set_rx_mode, .ndo_set_mac_address = mv643xx_eth_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mv643xx_eth_ioctl, + .ndo_eth_ioctl = mv643xx_eth_ioctl, .ndo_change_mtu = mv643xx_eth_change_mtu, .ndo_set_features = mv643xx_eth_set_features, .ndo_tx_timeout = mv643xx_eth_tx_timeout, diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 76a7777c746d..ff8db311963c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2666,7 +2666,7 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, return 0; if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) { - pr_info("*** Is this even possible???!?!?\n"); + pr_info("*** Is this even possible?\n"); return 0; } @@ -3832,12 +3832,20 @@ static void mvneta_validate(struct phylink_config *config, struct mvneta_port *pp = netdev_priv(ndev); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - /* We only support QSGMII, SGMII, 802.3z and RGMII modes */ - if (state->interface != PHY_INTERFACE_MODE_NA && - state->interface != PHY_INTERFACE_MODE_QSGMII && - state->interface != PHY_INTERFACE_MODE_SGMII && - !phy_interface_mode_is_8023z(state->interface) && - !phy_interface_mode_is_rgmii(state->interface)) { + /* We only support QSGMII, SGMII, 802.3z and RGMII modes. + * When in 802.3z mode, we must have AN enabled: + * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ... + * When <PortType> = 1 (1000BASE-X) this field must be set to 1." + */ + if (phy_interface_mode_is_8023z(state->interface)) { + if (!phylink_test(state->advertising, Autoneg)) { + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + return; + } + } else if (state->interface != PHY_INTERFACE_MODE_NA && + state->interface != PHY_INTERFACE_MODE_QSGMII && + state->interface != PHY_INTERFACE_MODE_SGMII && + !phy_interface_mode_is_rgmii(state->interface)) { bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); return; } @@ -4986,7 +4994,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_change_mtu = mvneta_change_mtu, .ndo_fix_features = mvneta_fix_features, .ndo_get_stats64 = mvneta_get_stats64, - .ndo_do_ioctl = mvneta_ioctl, + .ndo_eth_ioctl = mvneta_ioctl, .ndo_bpf = mvneta_xdp, .ndo_xdp_xmit = mvneta_xdp_xmit, .ndo_setup_tc = mvneta_setup_tc, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 3229bafa2a2c..99bd8b8aa0e2 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5702,7 +5702,7 @@ static const struct net_device_ops mvpp2_netdev_ops = { .ndo_set_mac_address = mvpp2_set_mac_address, .ndo_change_mtu = mvpp2_change_mtu, .ndo_get_stats64 = mvpp2_get_stats64, - .ndo_do_ioctl = mvpp2_ioctl, + .ndo_eth_ioctl = mvpp2_ioctl, .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid, .ndo_set_features = mvpp2_set_features, @@ -6269,6 +6269,15 @@ static void mvpp2_phylink_validate(struct phylink_config *config, if (!mvpp2_port_supports_rgmii(port)) goto empty_set; break; + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + /* When in 802.3z mode, we must have AN enabled: + * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ... + * When <PortType> = 1 (1000BASE-X) this field must be set to 1. + */ + if (!phylink_test(state->advertising, Autoneg)) + goto empty_set; + break; default: break; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index 47f5ed006a93..752ba6b4b919 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -146,10 +146,7 @@ enum nix_scheduler { #define TXSCH_RR_QTM_MAX ((1 << 24) - 1) #define TXSCH_TL1_DFLT_RR_QTM TXSCH_RR_QTM_MAX #define TXSCH_TL1_DFLT_RR_PRIO (0x1ull) -#define MAX_SCHED_WEIGHT 0xFF -#define DFLT_RR_WEIGHT 71 -#define DFLT_RR_QTM ((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \ - / MAX_SCHED_WEIGHT) +#define CN10K_MAX_DWRR_WEIGHT 16384 /* Weight is 14bit on CN10K */ /* Min/Max packet sizes, excluding FCS */ #define NIC_HW_MIN_FRS 40 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index f5ec39de026a..447093361b7a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1032,8 +1032,12 @@ struct nix_bp_cfg_rsp { struct nix_hw_info { struct mbox_msghdr hdr; + u16 rsvs16; u16 max_mtu; u16 min_mtu; + u32 rpm_dwrr_mtu; + u32 sdp_dwrr_mtu; + u64 rsvd[16]; /* Add reserved fields for future expansion */ }; struct nix_bandprof_alloc_req { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 91503fb2762c..95591e77aea8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -329,6 +329,7 @@ struct hw_cap { bool nix_shaping; /* Is shaping and coloring supported */ bool nix_tx_link_bp; /* Can link backpressure TL queues ? */ bool nix_rx_multicast; /* Rx packet replication support */ + bool nix_common_dwrr_mtu; /* Common DWRR MTU for quantum config */ bool per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */ bool programmable_chans; /* Channels programmable ? */ bool ipolicer; @@ -706,6 +707,8 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_cn10k_aq_enq_rsp *aq_rsp, u16 pcifunc, u8 ctype, u32 qidx); int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); +u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu); +u32 convert_bytes_to_dwrr_mtu(u32 bytes); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 2688186066d9..6f963b2f54a7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1364,6 +1364,89 @@ static void rvu_health_reporters_destroy(struct rvu *rvu) rvu_nix_health_reporters_destroy(rvu_dl); } +/* Devlink Params APIs */ +static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + int dwrr_mtu = val.vu32; + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + + if (!rvu->hw->cap.nix_common_dwrr_mtu) { + NL_SET_ERR_MSG_MOD(extack, + "Setting DWRR_MTU is not supported on this silicon"); + return -EOPNOTSUPP; + } + + if ((dwrr_mtu > 65536 || !is_power_of_2(dwrr_mtu)) && + (dwrr_mtu != 9728 && dwrr_mtu != 10240)) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid, supported MTUs are 0,2,4,8.16,32,64....4K,8K,32K,64K and 9728, 10240"); + return -EINVAL; + } + + nix_hw = get_nix_hw(rvu->hw, BLKADDR_NIX0); + if (!nix_hw) + return -ENODEV; + + txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ]; + if (rvu_rsrc_free_count(&txsch->schq) != txsch->schq.max) { + NL_SET_ERR_MSG_MOD(extack, + "Changing DWRR MTU is not supported when there are active NIXLFs"); + NL_SET_ERR_MSG_MOD(extack, + "Make sure none of the PF/VF interfaces are initialized and retry"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + u64 dwrr_mtu; + + dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32); + rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu); + + return 0; +} + +static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + u64 dwrr_mtu; + + if (!rvu->hw->cap.nix_common_dwrr_mtu) + return -EOPNOTSUPP; + + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu); + + return 0; +} + +enum rvu_af_dl_param_id { + RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, +}; + +static const struct devlink_param rvu_af_dl_params[] = { + DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, + "dwrr_mtu", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set, + rvu_af_dl_dwrr_mtu_validate), +}; + +/* Devlink switch mode */ static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct rvu_devlink *rvu_dl = devlink_priv(devlink); @@ -1438,7 +1521,30 @@ int rvu_register_dl(struct rvu *rvu) rvu_dl->rvu = rvu; rvu->rvu_dl = rvu_dl; - return rvu_health_reporters_create(rvu); + err = rvu_health_reporters_create(rvu); + if (err) { + dev_err(rvu->dev, + "devlink health reporter creation failed with error %d\n", err); + goto err_dl_health; + } + + err = devlink_params_register(dl, rvu_af_dl_params, + ARRAY_SIZE(rvu_af_dl_params)); + if (err) { + dev_err(rvu->dev, + "devlink params register failed with error %d", err); + goto err_dl_health; + } + + devlink_params_publish(dl); + + return 0; + +err_dl_health: + rvu_health_reporters_destroy(rvu); + devlink_unregister(dl); + devlink_free(dl); + return err; } void rvu_unregister_dl(struct rvu *rvu) @@ -1449,6 +1555,8 @@ void rvu_unregister_dl(struct rvu *rvu) if (!dl) return; + devlink_params_unregister(dl, rvu_af_dl_params, + ARRAY_SIZE(rvu_af_dl_params)); rvu_health_reporters_destroy(rvu); devlink_unregister(dl); devlink_free(dl); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 4bfbbdf38770..53db8ebddb5e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -192,6 +192,47 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) return NULL; } +u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu) +{ + dwrr_mtu &= 0x1FULL; + + /* MTU used for DWRR calculation is in power of 2 up until 64K bytes. + * Value of 4 is reserved for MTU value of 9728 bytes. + * Value of 5 is reserved for MTU value of 10240 bytes. + */ + switch (dwrr_mtu) { + case 4: + return 9728; + case 5: + return 10240; + default: + return BIT_ULL(dwrr_mtu); + } + + return 0; +} + +u32 convert_bytes_to_dwrr_mtu(u32 bytes) +{ + /* MTU used for DWRR calculation is in power of 2 up until 64K bytes. + * Value of 4 is reserved for MTU value of 9728 bytes. + * Value of 5 is reserved for MTU value of 10240 bytes. + */ + if (bytes > BIT_ULL(16)) + return 0; + + switch (bytes) { + case 9728: + return 4; + case 10240: + return 5; + default: + return ilog2(bytes); + } + + return 0; +} + static void nix_rx_sync(struct rvu *rvu, int blkaddr) { int err; @@ -1958,8 +1999,17 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw, return; rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq), (TXSCH_TL1_DFLT_RR_PRIO << 1)); - rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq), - TXSCH_TL1_DFLT_RR_QTM); + + /* On OcteonTx2 the config was in bytes and newer silcons + * it's changed to weight. + */ + if (!rvu->hw->cap.nix_common_dwrr_mtu) + rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq), + TXSCH_TL1_DFLT_RR_QTM); + else + rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq), + CN10K_MAX_DWRR_WEIGHT); + rvu_write64(rvu, blkaddr, NIX_AF_TL1X_CIR(schq), 0x00); pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE); } @@ -2667,6 +2717,15 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) for (schq = 0; schq < txsch->schq.max; schq++) txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); } + + /* Setup a default value of 8192 as DWRR MTU */ + if (rvu->hw->cap.nix_common_dwrr_mtu) { + rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU, + convert_bytes_to_dwrr_mtu(8192)); + rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU, + convert_bytes_to_dwrr_mtu(8192)); + } + return 0; } @@ -2743,6 +2802,7 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, struct nix_hw_info *rsp) { u16 pcifunc = req->hdr.pcifunc; + u64 dwrr_mtu; int blkaddr; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); @@ -2755,6 +2815,20 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu); rsp->min_mtu = NIC_HW_MIN_FRS; + + if (!rvu->hw->cap.nix_common_dwrr_mtu) { + /* Return '1' on OTx2 */ + rsp->rpm_dwrr_mtu = 1; + rsp->sdp_dwrr_mtu = 1; + return 0; + } + + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU); + rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + return 0; } @@ -3647,6 +3721,28 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block) return 0; } +static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr) +{ + struct rvu_hwinfo *hw = rvu->hw; + u64 hw_const; + + hw_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); + + /* On OcteonTx2 DWRR quantum is directly configured into each of + * the transmit scheduler queues. And PF/VF drivers were free to + * config any value upto 2^24. + * On CN10K, HW is modified, the quantum configuration at scheduler + * queues is in terms of weight. And SW needs to setup a base DWRR MTU + * at NIX_AF_DWRR_RPM_MTU / NIX_AF_DWRR_SDP_MTU. HW will do + * 'DWRR MTU * weight' to get the quantum. + * + * Check if HW uses a common MTU for all DWRR quantum configs. + * On OcteonTx2 this register field is '0'. + */ + if (((hw_const >> 56) & 0x10) == 0x10) + hw->cap.nix_common_dwrr_mtu = true; +} + static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) { const struct npc_lt_def_cfg *ltdefs; @@ -3684,6 +3780,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) if (err) return err; + /* Setup capabilities of the NIX block */ + rvu_nix_setup_capabilities(rvu, blkaddr); + /* Initialize admin queue */ err = nix_aq_init(rvu, block); if (err) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 8b01ef6e2c99..6efcf3afff40 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -269,6 +269,8 @@ #define NIX_AF_DEBUG_NPC_RESP_DATAX(a) (0x680 | (a) << 3) #define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16) #define NIX_AF_SQM_DBG_CTL_STATUS (0x750) +#define NIX_AF_DWRR_SDP_MTU (0x790) +#define NIX_AF_DWRR_RPM_MTU (0x7A0) #define NIX_AF_PSE_CHANNEL_LEVEL (0x800) #define NIX_AF_PSE_SHAPER_CFG (0x810) #define NIX_AF_TX_EXPR_CREDIT (0x830) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h index 64aa7d350df1..6af97ce69443 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h @@ -14,6 +14,8 @@ #include <linux/tracepoint.h> #include <linux/pci.h> +#include "mbox.h" + TRACE_EVENT(otx2_msg_alloc, TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size), TP_ARGS(pdev, id, size), @@ -25,8 +27,8 @@ TRACE_EVENT(otx2_msg_alloc, __entry->id = id; __entry->size = size; ), - TP_printk("[%s] msg:(0x%x) size:%lld\n", __get_str(dev), - __entry->id, __entry->size) + TP_printk("[%s] msg:(%s) size:%lld\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), __entry->size) ); TRACE_EVENT(otx2_msg_send, @@ -88,8 +90,8 @@ TRACE_EVENT(otx2_msg_process, __entry->id = id; __entry->err = err; ), - TP_printk("[%s] msg:(0x%x) error:%d\n", __get_str(dev), - __entry->id, __entry->err) + TP_printk("[%s] msg:(%s) error:%d\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), __entry->err) ); #endif /* __RVU_TRACE_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index 184de9466286..ccffddad1233 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -92,8 +92,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) aq->sq.ena = 1; /* Only one SMQ is allocated, map all SQ's to that SMQ */ aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/ - aq->sq.smq_rr_weight = pfvf->netdev->mtu; + aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); aq->sq.default_chan = pfvf->hw.tx_chan_base; aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ aq->sq.sqb_aura = sqb_aura; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h index 1a1ae334477d..e07723d71a26 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h @@ -9,6 +9,20 @@ #include "otx2_common.h" +static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu) +{ + u32 weight; + + /* On OTx2, since AF returns DWRR_MTU as '1', this logic + * will work on those silicons as well. + */ + weight = mtu / pfvf->hw.dwrr_mtu; + if (mtu % pfvf->hw.dwrr_mtu) + weight += 1; + + return weight; +} + void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 70fcc1fd962f..ce799b7a8449 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -596,6 +596,9 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) struct otx2_hw *hw = &pfvf->hw; struct nix_txschq_config *req; u64 schq, parent; + u64 dwrr_val; + + dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox); if (!req) @@ -621,21 +624,21 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) req->num_regs++; /* Set DWRR quantum */ req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq); - req->regval[2] = DFLT_RR_QTM; + req->regval[2] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL4) { parent = hw->txschq_list[NIX_TXSCH_LVL_TL3][0]; req->reg[0] = NIX_AF_TL4X_PARENT(schq); req->regval[0] = parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq); - req->regval[1] = DFLT_RR_QTM; + req->regval[1] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL3) { parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0]; req->reg[0] = NIX_AF_TL3X_PARENT(schq); req->regval[0] = parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq); - req->regval[1] = DFLT_RR_QTM; + req->regval[1] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL2) { parent = hw->txschq_list[NIX_TXSCH_LVL_TL1][0]; req->reg[0] = NIX_AF_TL2X_PARENT(schq); @@ -643,7 +646,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) req->num_regs++; req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq); - req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM; + req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val; req->num_regs++; req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, @@ -656,7 +659,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) * For VF this is always ignored. */ - /* Set DWRR quantum */ + /* On CN10K, if RR_WEIGHT is greater than 16384, HW will + * clip it to 16384, so configuring a 24bit max value + * will work on both OTx2 and CN10K. + */ req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq); req->regval[0] = TXSCH_TL1_DFLT_RR_QTM; @@ -803,7 +809,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) aq->sq.ena = 1; /* Only one SMQ is allocated, map all SQ's to that SMQ */ aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - aq->sq.smq_rr_quantum = DFLT_RR_QTM; + aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); aq->sq.default_chan = pfvf->hw.tx_chan_base; aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ aq->sq.sqb_aura = sqb_aura; @@ -1668,6 +1674,11 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf) * SMQ errors */ max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN; + + /* Also save DWRR MTU, needed for DWRR weight calculation */ + pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu; + if (!pfvf->hw.dwrr_mtu) + pfvf->hw.dwrr_mtu = 1; } out: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 8fd58cd07f50..2a80cdc848e5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -181,6 +181,7 @@ struct otx2_hw { /* NIX */ u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; u16 matchall_ipolicer; + u32 dwrr_mtu; /* HW settings, coalescing etc */ u16 rx_chan_base; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 2c24944a4dba..22b7af029ebf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2331,7 +2331,7 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_set_features = otx2_set_features, .ndo_tx_timeout = otx2_tx_timeout, .ndo_get_stats64 = otx2_get_stats64, - .ndo_do_ioctl = otx2_ioctl, + .ndo_eth_ioctl = otx2_ioctl, .ndo_set_vf_mac = otx2_set_vf_mac, .ndo_set_vf_vlan = otx2_set_vf_vlan, .ndo_get_vf_config = otx2_get_vf_config, diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 226f4ff29f6e..7c569c1abefc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -746,7 +746,8 @@ static int prestera_netdev_port_event(struct net_device *lower, case NETDEV_CHANGEUPPER: if (netif_is_bridge_master(upper)) { if (info->linking) - return prestera_bridge_port_join(upper, port); + return prestera_bridge_port_join(upper, port, + extack); else prestera_bridge_port_leave(upper, port); } else if (netif_is_lag_master(upper)) { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 0b3e8f2db294..be01ec8284e6 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -480,7 +480,8 @@ err_port_flood_set: } int prestera_bridge_port_join(struct net_device *br_dev, - struct prestera_port *port) + struct prestera_port *port, + struct netlink_ext_ack *extack) { struct prestera_switchdev *swdev = port->sw->swdev; struct prestera_bridge_port *br_port; @@ -500,6 +501,11 @@ int prestera_bridge_port_join(struct net_device *br_dev, goto err_brport_create; } + err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL, + NULL, NULL, false, extack); + if (err) + goto err_switchdev_offload; + if (bridge->vlan_enabled) return 0; @@ -510,6 +516,8 @@ int prestera_bridge_port_join(struct net_device *br_dev, return 0; err_port_join: + switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL); +err_switchdev_offload: prestera_bridge_port_put(br_port); err_brport_create: prestera_bridge_put(bridge); @@ -584,6 +592,8 @@ void prestera_bridge_port_leave(struct net_device *br_dev, else prestera_bridge_1d_port_leave(br_port); + switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL); + prestera_hw_port_learning_set(port, false); prestera_hw_port_flood_set(port, BR_FLOOD | BR_MCAST_FLOOD, 0); prestera_port_vid_stp_set(port, PRESTERA_VID_ALL, BR_STATE_FORWARDING); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h index a91bc35d235f..0e93fda3d9a5 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h @@ -8,7 +8,8 @@ int prestera_switchdev_init(struct prestera_switch *sw); void prestera_switchdev_fini(struct prestera_switch *sw); int prestera_bridge_port_join(struct net_device *br_dev, - struct prestera_port *port); + struct prestera_port *port, + struct netlink_ext_ack *extack); void prestera_bridge_port_leave(struct net_device *br_dev, struct prestera_port *port); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 9b48ae4bac39..fab53c9b8380 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1377,7 +1377,7 @@ static const struct net_device_ops pxa168_eth_netdev_ops = { .ndo_set_rx_mode = pxa168_eth_set_rx_mode, .ndo_set_mac_address = pxa168_eth_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_change_mtu = pxa168_eth_change_mtu, .ndo_tx_timeout = pxa168_eth_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index d4bb27ba1419..150c06ee3627 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3787,7 +3787,7 @@ static const struct net_device_ops skge_netdev_ops = { .ndo_open = skge_up, .ndo_stop = skge_down, .ndo_start_xmit = skge_xmit_frame, - .ndo_do_ioctl = skge_ioctl, + .ndo_eth_ioctl = skge_ioctl, .ndo_get_stats = skge_get_stats, .ndo_tx_timeout = skge_tx_timeout, .ndo_change_mtu = skge_change_mtu, diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 8b8bff59c8fe..dc9dd77d1ea0 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4693,7 +4693,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { .ndo_open = sky2_open, .ndo_stop = sky2_close, .ndo_start_xmit = sky2_xmit_frame, - .ndo_do_ioctl = sky2_ioctl, + .ndo_eth_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, .ndo_set_rx_mode = sky2_set_multicast, @@ -4710,7 +4710,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { .ndo_open = sky2_open, .ndo_stop = sky2_close, .ndo_start_xmit = sky2_xmit_frame, - .ndo_do_ioctl = sky2_ioctl, + .ndo_eth_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, .ndo_set_rx_mode = sky2_set_multicast, @@ -4884,7 +4884,7 @@ static int sky2_test_msi(struct sky2_hw *hw) /* This driver supports yukon2 chipset only */ static const char *sky2_name(u8 chipid, char *buf, int sz) { - const char *name[] = { + static const char *const name[] = { "XL", /* 0xb3 */ "EC Ultra", /* 0xb4 */ "Extreme", /* 0xb5 */ diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 64adfd24e134..398c23cec815 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2933,7 +2933,7 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_start_xmit = mtk_start_xmit, .ndo_set_mac_address = mtk_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mtk_do_ioctl, + .ndo_eth_ioctl = mtk_do_ioctl, .ndo_change_mtu = mtk_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 96d2891f1675..1d5dd2015453 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1162,7 +1162,7 @@ static const struct net_device_ops mtk_star_netdev_ops = { .ndo_start_xmit = mtk_star_netdev_start_xmit, .ndo_get_stats64 = mtk_star_netdev_get_stats64, .ndo_set_rx_mode = mtk_star_set_rx_mode, - .ndo_do_ioctl = mtk_star_netdev_ioctl, + .ndo_eth_ioctl = mtk_star_netdev_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5d0c9c62382d..a2f61a87cef8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2828,7 +2828,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = mlx4_en_change_mtu, - .ndo_do_ioctl = mlx4_en_ioctl, + .ndo_eth_ioctl = mlx4_en_ioctl, .ndo_tx_timeout = mlx4_en_tx_timeout, .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 427e7a31862c..2584bc038f94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -917,7 +917,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, { int err; int i; - enum mlx4_qp_state states[] = { + static const enum mlx4_qp_state states[] = { MLX4_QP_STATE_RST, MLX4_QP_STATE_INIT, MLX4_QP_STATE_RTR, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index b5072a3a2585..33e550d77fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ fw_reset.o qos.o @@ -27,7 +27,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o en/fs_tt_redirect.o + en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \ + en/rx_res.o en/channels.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b1b51bbba054..4f6897c1ea8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -58,6 +58,7 @@ #include "en/qos.h" #include "lib/hv_vhca.h" #include "lib/clock.h" +#include "en/rx_res.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -65,8 +66,6 @@ struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) #define MLX5E_METADATA_ETHER_LEN 8 -#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) - #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) @@ -126,7 +125,6 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 -#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 #define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 @@ -139,8 +137,6 @@ struct page_pool; #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 -#define MLX5E_LOG_INDIR_RQT_SIZE 0x8 -#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MIN_NUM_CHANNELS 0x1 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) @@ -745,29 +741,11 @@ enum { MLX5E_STATE_XDP_ACTIVE, }; -struct mlx5e_rqt { - u32 rqtn; - bool enabled; -}; - -struct mlx5e_tir { - u32 tirn; - struct mlx5e_rqt rqt; - struct list_head list; -}; - enum { MLX5E_TC_PRIO = 0, MLX5E_NIC_PRIO }; -struct mlx5e_rss_params { - u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; - u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; - u8 toeplitz_hash_key[40]; - u8 hfunc; -}; - struct mlx5e_modify_sq_param { int curr_state; int next_state; @@ -837,13 +815,7 @@ struct mlx5e_priv { struct mlx5e_channels channels; u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; - struct mlx5e_rqt indir_rqt; - struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; - struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; - struct mlx5e_tir ptp_tir; - struct mlx5e_rss_params rss_params; + struct mlx5e_rx_res *rx_res; u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -948,25 +920,6 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); void mlx5e_timestamp_init(struct mlx5e_priv *priv); -struct mlx5e_redirect_rqt_param { - bool is_rss; - union { - u32 rqn; /* Direct RQN (Non-RSS) */ - struct { - u8 hfunc; - struct mlx5e_channels *channels; - } rss; /* RSS data */ - }; -}; - -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, - struct mlx5e_redirect_rqt_param rrp); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, - const struct mlx5e_tirc_config *ttconfig, - void *tirc, bool inner); -void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in); -struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); - struct mlx5e_xsk_param; struct mlx5e_rq_param; @@ -1028,9 +981,6 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, - int num_channels); - int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); @@ -1065,10 +1015,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) extern const struct ethtool_ops mlx5e_ethtool_ops; -int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, - u32 *in); -void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, - struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, @@ -1084,17 +1030,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node); void mlx5e_free_di_list(struct mlx5e_rq *rq); -int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); - -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); - -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); - int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); @@ -1106,7 +1041,6 @@ int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); -int mlx5e_bits_invert(unsigned long a, int size); int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context); @@ -1183,8 +1117,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, - u16 num_channels); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c new file mode 100644 index 000000000000..e7c14c0de0a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "channels.h" +#include "en.h" +#include "en/ptp.h" + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs) +{ + return chs->num; +} + +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c; + + WARN_ON(ix >= mlx5e_channels_get_num(chs)); + c = chs->c[ix]; + + *rqn = c->rq.rqn; +} + +bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c; + + WARN_ON(ix >= mlx5e_channels_get_num(chs)); + c = chs->c[ix]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + return false; + + *rqn = c->xskrq.rqn; + return true; +} + +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) +{ + struct mlx5e_ptp *c = chs->ptp; + + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return false; + + *rqn = c->rq.rqn; + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h new file mode 100644 index 000000000000..ca00cbc827cb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_CHANNELS_H__ +#define __MLX5_EN_CHANNELS_H__ + +#include <linux/kernel.h> + +struct mlx5e_channels; + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs); +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); + +#endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index bc33eaada3b9..86e079310ac3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -55,19 +55,15 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) { struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); - if (dl_port->registered) - devlink_port_unregister(dl_port); + devlink_port_unregister(dl_port); } struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct devlink_port *port; if (!netif_device_present(dev)) return NULL; - port = mlx5e_devlink_get_dl_port(priv); - if (port->registered) - return port; - return NULL; + + return mlx5e_devlink_get_dl_port(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 1d5ce07b83f4..e348c276eaa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -5,6 +5,7 @@ #define __MLX5E_FLOW_STEER_H__ #include "mod_hdr.h" +#include "lib/fs_ttc.h" enum { MLX5E_TC_FT_LEVEL = 0, @@ -67,27 +68,7 @@ struct mlx5e_l2_table { bool promisc_enabled; }; -enum mlx5e_traffic_types { - MLX5E_TT_IPV4_TCP, - MLX5E_TT_IPV6_TCP, - MLX5E_TT_IPV4_UDP, - MLX5E_TT_IPV6_UDP, - MLX5E_TT_IPV4_IPSEC_AH, - MLX5E_TT_IPV6_IPSEC_AH, - MLX5E_TT_IPV4_IPSEC_ESP, - MLX5E_TT_IPV6_IPSEC_ESP, - MLX5E_TT_IPV4, - MLX5E_TT_IPV6, - MLX5E_TT_ANY, - MLX5E_NUM_TT, - MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, -}; - -struct mlx5e_tirc_config { - u8 l3_prot_type; - u8 l4_prot_type; - u32 rx_hash_fields; -}; +#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1) #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -99,30 +80,6 @@ struct mlx5e_tirc_config { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -enum mlx5e_tunnel_types { - MLX5E_TT_IPV4_GRE, - MLX5E_TT_IPV6_GRE, - MLX5E_TT_IPV4_IPIP, - MLX5E_TT_IPV6_IPIP, - MLX5E_TT_IPV4_IPV6, - MLX5E_TT_IPV6_IPV6, - MLX5E_NUM_TUNNEL_TT, -}; - -bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); - -struct mlx5e_ttc_rule { - struct mlx5_flow_handle *rule; - struct mlx5_flow_destination default_dest; -}; - -/* L3/L4 traffic type classifier */ -struct mlx5e_ttc_table { - struct mlx5e_flow_table ft; - struct mlx5e_ttc_rule rules[MLX5E_NUM_TT]; - struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; -}; - /* NIC prio FTS */ enum { MLX5E_PROMISC_FT_LEVEL, @@ -144,21 +101,7 @@ enum { #endif }; -#define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ - MLX5E_TTC_GROUP2_SIZE +\ - MLX5E_TTC_GROUP3_SIZE) - -#define MLX5E_INNER_TTC_NUM_GROUPS 3 -#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ - MLX5E_INNER_TTC_GROUP2_SIZE +\ - MLX5E_INNER_TTC_GROUP3_SIZE) +struct mlx5e_priv; #ifdef CONFIG_MLX5_EN_RXNFC @@ -226,8 +169,8 @@ struct mlx5e_flow_steering { struct mlx5e_promisc_table promisc; struct mlx5e_vlan_table *vlan; struct mlx5e_l2_table l2; - struct mlx5e_ttc_table ttc; - struct mlx5e_ttc_table inner_ttc; + struct mlx5_ttc_table *ttc; + struct mlx5_ttc_table *inner_ttc; #ifdef CONFIG_MLX5_EN_ARFS struct mlx5e_arfs_tables *arfs; #endif @@ -239,33 +182,13 @@ struct mlx5e_flow_steering { struct mlx5e_ptp_fs *ptp_fs; }; -struct ttc_params { - struct mlx5_flow_table_attr ft_attr; - u32 any_tt_tirn; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_ttc_table *inner_ttc; -}; - -void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); -void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); -void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params); - -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc); -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc); +void mlx5e_set_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params, bool tunnel); -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc); -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); +int mlx5e_create_ttc_table(struct mlx5e_priv *priv); void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); -int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type, - struct mlx5_flow_destination *new_dest); -struct mlx5_flow_destination -mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type); -int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type); void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); @@ -273,7 +196,6 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); -u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt); int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv); int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 909faa6c89d7..7aa25a5e29d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -33,22 +33,22 @@ static char *fs_udp_type2str(enum fs_udp_type i) } } -static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i) +static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i) { switch (i) { case FS_IPV4_UDP: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; default: /* FS_IPV6_UDP */ - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; } } -static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i) +static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i) { switch (i) { - case MLX5E_TT_IPV4_UDP: + case MLX5_TT_IPV4_UDP: return FS_IPV4_UDP; - case MLX5E_TT_IPV6_UDP: + case MLX5_TT_IPV6_UDP: return FS_IPV6_UDP; default: return FS_UDP_NUM_TYPES; @@ -75,7 +75,7 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type struct mlx5_flow_handle * mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, - enum mlx5e_traffic_types ttc_type, + enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port) { enum fs_udp_type type = tt2fs_udp(ttc_type); @@ -124,7 +124,7 @@ static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type typ fs_udp = priv->fs.udp; fs_udp_t = &fs_udp->tables[type]; - dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type)); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type)); rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -259,7 +259,7 @@ static int fs_udp_disable(struct mlx5e_priv *priv) for (i = 0; i < FS_UDP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -281,7 +281,7 @@ static int fs_udp_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.udp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", @@ -401,7 +401,7 @@ static int fs_any_add_default_rule(struct mlx5e_priv *priv) fs_any = priv->fs.any; fs_any_t = &fs_any->table; - dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY); rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -514,11 +514,11 @@ static int fs_any_disable(struct mlx5e_priv *priv) int err; /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", - __func__, MLX5E_TT_ANY, err); + __func__, MLX5_TT_ANY, err); return err; } return 0; @@ -533,11 +533,11 @@ static int fs_any_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.any->table.t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", - __func__, MLX5E_TT_ANY, err); + __func__, MLX5_TT_ANY, err); return err; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h index 8385df24eb99..7a70c4f38fda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -12,7 +12,7 @@ void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); /* UDP traffic type redirect */ struct mlx5_flow_handle * mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, - enum mlx5e_traffic_types ttc_type, + enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port); void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv); int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 2cbf18c967f7..3cbb596821e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -167,6 +167,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; } +struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params) +{ + struct mlx5e_lro_param lro_param; + + lro_param = (struct mlx5e_lro_param) { + .enabled = params->lro_en, + .timeout = params->lro_timeout, + }; + + return lro_param; +} + u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index e9593f5f0661..879ad46d754e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -11,6 +11,11 @@ struct mlx5e_xsk_param { u16 chunk_size; }; +struct mlx5e_lro_param { + bool enabled; + u32 timeout; +}; + struct mlx5e_cq_param { u32 cqc[MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; @@ -120,6 +125,7 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params); /* Build queue parameters */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index efef4adce086..f479ef31ca40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -605,9 +605,9 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv) static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) { + u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res); struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; struct mlx5_flow_handle *rule; - u32 tirn = priv->ptp_tir.tirn; int err; if (ptp_fs->valid) @@ -617,7 +617,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) if (err) goto out_free; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -625,7 +625,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) } ptp_fs->udp_v4_rule = rule; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c new file mode 100644 index 000000000000..b915fb29dd2c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rqt.h" +#include <linux/mlx5/transobj.h> + +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir->table[i] = i % num_channels; +} + +static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u16 max_size, u32 *init_rqns, u16 init_size) +{ + void *rqtc; + int inlen; + int err; + u32 *in; + int i; + + rqt->mdev = mdev; + rqt->size = max_size; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size); + for (i = 0; i < init_size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]); + + err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn); + + kvfree(in); + return err; +} + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn) +{ + u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1; + + return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1); +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { + unsigned int ix = i; + + if (hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE)); + + ix = indir->table[ix]; + + if (WARN_ON(ix >= num_rqns)) + /* Could be a bug in the driver or in the kernel part of + * ethtool: indir table refers to non-existent RQs. + */ + return -EINVAL; + rss_rqns[i] = rqns[ix]; + } + + return 0; +} + +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} + +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt) +{ + mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn); +} + +static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size) +{ + unsigned int i; + void *rqtc; + int inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + + MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); + MLX5_SET(rqtc, rqtc, rqt_actual_size, size); + for (i = 0; i < size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]); + + err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen); + + kvfree(in); + return err; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn) +{ + return mlx5e_rqt_redirect(rqt, &rqn, 1); +} + +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE)) + return -EINVAL; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h new file mode 100644 index 000000000000..60c985a12f24 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RQT_H__ +#define __MLX5_EN_RQT_H__ + +#include <linux/kernel.h> + +#define MLX5E_INDIR_RQT_SIZE (1 << 8) + +struct mlx5_core_dev; + +struct mlx5e_rss_params_indir { + u32 table[MLX5E_INDIR_RQT_SIZE]; +}; + +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels); + +struct mlx5e_rqt { + struct mlx5_core_dev *mdev; + u32 rqtn; + u16 size; +}; + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn); +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt); + +static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt) +{ + return rqt->rqtn; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn); +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); + +#endif /* __MLX5_EN_RQT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c new file mode 100644 index 000000000000..e2a8fe13f29d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -0,0 +1,831 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rx_res.h" +#include "channels.h" +#include "params.h" + +static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5_TT_IPV4_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5_TT_IPV6] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt) +{ + return rss_default_config[tt]; +} + +struct mlx5e_rx_res { + struct mlx5_core_dev *mdev; + enum mlx5e_rx_res_features features; + unsigned int max_nch; + u32 drop_rqn; + + struct { + struct mlx5e_rss_params_hash hash; + struct mlx5e_rss_params_indir indir; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + } rss_params; + + struct mlx5e_rqt indir_rqt; + struct { + struct mlx5e_tir indir_tir; + struct mlx5e_tir inner_indir_tir; + } rss[MLX5E_NUM_INDIR_TIRS]; + + bool rss_active; + u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; + unsigned int rss_nch; + + struct { + struct mlx5e_rqt direct_rqt; + struct mlx5e_tir direct_tir; + struct mlx5e_rqt xsk_rqt; + struct mlx5e_tir xsk_tir; + } *channels; + + struct { + struct mlx5e_rqt rqt; + struct mlx5e_tir tir; + } ptp; +}; + +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); +} + +static void mlx5e_rx_res_rss_params_init(struct mlx5e_rx_res *res, unsigned int init_nch) +{ + enum mlx5_traffic_types tt; + + res->rss_params.hash.hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(res->rss_params.hash.toeplitz_hash_key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, init_nch); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + res->rss_params.rx_hash_fields[tt] = + mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; +} + +static int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, + const struct mlx5e_lro_param *init_lro_param) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + enum mlx5_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + u32 indir_rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->indir_rqt, res->mdev, true, res->drop_rqn); + if (err) + goto out; + + indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt); + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, false); + + err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an indirect TIR: err = %d, tt = %d\n", + err, tt); + goto err_destroy_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + if (!inner_ft_support) + goto out; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, true); + + err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an inner indirect TIR: err = %d, tt = %d\n", + err, tt); + goto err_destroy_inner_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_inner_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + + tt = MLX5E_NUM_INDIR_TIRS; +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); + + mlx5e_rqt_destroy(&res->indir_rqt); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, + const struct mlx5e_lro_param *init_lro_param) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err = 0; + int ix; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL); + if (!res->channels) { + err = -ENOMEM; + goto out; + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + goto out; + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_xsk_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_xsk_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_xsk_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].xsk_tir); + + ix = res->max_nch; +err_destroy_xsk_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); + + ix = res->max_nch; +err_destroy_direct_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + + ix = res->max_nch; +err_destroy_direct_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + kvfree(res->channels); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn); + if (err) + goto out; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true); + if (err) + goto err_destroy_ptp_rqt; + + goto out; + +err_destroy_ptp_rqt: + mlx5e_rqt_destroy(&res->ptp.rqt); + +out: + mlx5e_tir_builder_free(builder); + return err; +} + +static void mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res) +{ + enum mlx5_traffic_types tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); + + if (res->features & MLX5E_RX_RES_FEATURE_INNER_FT) + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + + mlx5e_rqt_destroy(&res->indir_rqt); +} + +static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res) +{ + unsigned int ix; + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + mlx5e_tir_destroy(&res->channels[ix].xsk_tir); + mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); + } + + kvfree(res->channels); +} + +static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_tir_destroy(&res->ptp.tir); + mlx5e_rqt_destroy(&res->ptp.rqt); +} + +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + unsigned int init_nch) +{ + int err; + + res->mdev = mdev; + res->features = features; + res->max_nch = max_nch; + res->drop_rqn = drop_rqn; + + mlx5e_rx_res_rss_params_init(res, init_nch); + + err = mlx5e_rx_res_rss_init(res, init_lro_param); + if (err) + return err; + + err = mlx5e_rx_res_channels_init(res, init_lro_param); + if (err) + goto err_rss_destroy; + + err = mlx5e_rx_res_ptp_init(res); + if (err) + goto err_channels_destroy; + + return 0; + +err_channels_destroy: + mlx5e_rx_res_channels_destroy(res); +err_rss_destroy: + mlx5e_rx_res_rss_destroy(res); + return err; +} + +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_rx_res_ptp_destroy(res); + mlx5e_rx_res_channels_destroy(res); + mlx5e_rx_res_rss_destroy(res); +} + +void mlx5e_rx_res_free(struct mlx5e_rx_res *res) +{ + kvfree(res); +} + +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); +} + +u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK)); + + return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + return mlx5e_tir_get_tirn(&res->rss[tt].indir_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)); + return mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir); +} + +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP)); + return mlx5e_tir_get_tirn(&res->ptp.tir); +} + +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); +} + +static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) +{ + int err; + + res->rss_active = true; + + err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, + res->rss_params.hash.hfunc, + &res->rss_params.indir); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), err); +} + +static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res) +{ + int err; + + res->rss_active = false; + + err = mlx5e_rqt_redirect_direct(&res->indir_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to drop RQ %#x: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), res->drop_rqn, err); +} + +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) +{ + unsigned int nch, ix; + int err; + + nch = mlx5e_channels_get_num(chs); + + for (ix = 0; ix < chs->num; ix++) + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + res->rss_nch = chs->num; + + mlx5e_rx_res_rss_enable(res); + + for (ix = 0; ix < nch; ix++) { + u32 rqn; + + mlx5e_channels_get_regular_rqn(chs, ix, &rqn); + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) + rqn = res->drop_rqn; + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + rqn, ix, err); + } + for (ix = nch; ix < res->max_nch; ix++) { + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + } + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + u32 rqn; + + if (mlx5e_channels_get_ptp_rqn(chs, &rqn)) + rqn = res->drop_rqn; + + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + rqn, err); + } +} + +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) +{ + unsigned int ix; + int err; + + mlx5e_rx_res_rss_disable(res); + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + } + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + res->drop_rqn, err); + } +} + +int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix) +{ + u32 rqn; + int err; + + if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) + return -EINVAL; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + rqn, ix, err); + return err; +} + +int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix) +{ + int err; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + return err; +} + +struct mlx5e_rss_params_traffic_type +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + + rss_tt = mlx5e_rss_get_default_tt_config(tt); + rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt]; + return rss_tt; +} + +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch) +{ + mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, nch); + + if (!res->rss_active) + return; + + mlx5e_rx_res_rss_enable(res); +} + +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc) +{ + unsigned int i; + + if (indir) + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir[i] = res->rss_params.indir.table[i]; + + if (key) + memcpy(key, res->rss_params.hash.toeplitz_hash_key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + + if (hfunc) + *hfunc = res->rss_params.hash.hfunc; +} + +static int mlx5e_rx_res_rss_update_tir(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir *tir; + int err; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, inner); + tir = inner ? &res->rss[tt].inner_indir_tir : &res->rss[tt].indir_tir; + err = mlx5e_tir_modify(tir, builder); + + mlx5e_tir_builder_free(builder); + return err; +} + +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, + const u8 *key, const u8 *hfunc) +{ + enum mlx5_traffic_types tt; + bool changed_indir = false; + bool changed_hash = false; + int err; + + if (hfunc && *hfunc != res->rss_params.hash.hfunc) { + switch (*hfunc) { + case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_TOP: + break; + default: + return -EINVAL; + } + changed_hash = true; + changed_indir = true; + res->rss_params.hash.hfunc = *hfunc; + } + + if (key) { + if (res->rss_params.hash.hfunc == ETH_RSS_HASH_TOP) + changed_hash = true; + memcpy(res->rss_params.hash.toeplitz_hash_key, key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + } + + if (indir) { + unsigned int i; + + changed_indir = true; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + res->rss_params.indir.table[i] = indir[i]; + } + + if (changed_indir && res->rss_active) { + err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, + res->rss_params.hash.hfunc, + &res->rss_params.indir); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), err); + } + + if (changed_hash) + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rx_res_rss_update_tir(res, tt, false); + if (err) + mlx5_core_warn(res->mdev, "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n", + tt, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + continue; + + err = mlx5e_rx_res_rss_update_tir(res, tt, true); + if (err) + mlx5_core_warn(res->mdev, "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + + return 0; +} + +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + return res->rss_params.rx_hash_fields[tt]; +} + +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + u8 old_rx_hash_fields; + int err; + + old_rx_hash_fields = res->rss_params.rx_hash_fields[tt]; + + if (old_rx_hash_fields == rx_hash_fields) + return 0; + + res->rss_params.rx_hash_fields[tt] = rx_hash_fields; + + err = mlx5e_rx_res_rss_update_tir(res, tt, false); + if (err) { + res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; + mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n", + tt, err); + return err; + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + return 0; + + err = mlx5e_rx_res_rss_update_tir(res, tt, true); + if (err) { + /* Partial update happened. Try to revert - it may fail too, but + * there is nothing more we can do. + */ + res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; + mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + if (mlx5e_rx_res_rss_update_tir(res, tt, false)) + mlx5_core_warn(res->mdev, "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n", + tt); + } + + return err; +} + +int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param) +{ + struct mlx5e_tir_builder *builder; + enum mlx5_traffic_types tt; + int err, final_err; + unsigned int ix; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_lro(builder, lro_param); + + final_err = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(&res->rss[tt].indir_tir), tt, err); + if (!final_err) + final_err = err; + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + continue; + + err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir), tt, err); + if (!final_err) + final_err = err; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n", + mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err); + if (!final_err) + final_err = err; + } + } + + mlx5e_tir_builder_free(builder); + return final_err; +} + +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res) +{ + return res->rss_params.hash; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h new file mode 100644 index 000000000000..1baeec5158a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RX_RES_H__ +#define __MLX5_EN_RX_RES_H__ + +#include <linux/kernel.h> +#include "rqt.h" +#include "tir.h" +#include "fs.h" + +struct mlx5e_rx_res; + +struct mlx5e_channels; +struct mlx5e_rss_params_hash; + +enum mlx5e_rx_res_features { + MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), + MLX5E_RX_RES_FEATURE_XSK = BIT(1), + MLX5E_RX_RES_FEATURE_PTP = BIT(2), +}; + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); + +/* Setup */ +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void); +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + unsigned int init_nch); +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res); +void mlx5e_rx_res_free(struct mlx5e_rx_res *res); + +/* TIRN getters for flow steering */ +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); + +/* RQTN getters for modules that create their own TIRs */ +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); + +/* Activate/deactivate API */ +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); +int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix); +int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix); + +/* Configuration API */ +struct mlx5e_rss_params_traffic_type +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, + const u8 *key, const u8 *hfunc); +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields); +int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param); + +/* Workaround for hairpin */ +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); + +#endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c new file mode 100644 index 000000000000..de936dc4bc48 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "tir.h" +#include "params.h" +#include <linux/mlx5/transobj.h> + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) + +/* max() doesn't work inside square brackets. */ +#define MLX5E_TIR_CMD_IN_SZ_DW ( \ + MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \ + MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \ +) + +struct mlx5e_tir_builder { + u32 in[MLX5E_TIR_CMD_IN_SZ_DW]; + bool modify; +}; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify) +{ + struct mlx5e_tir_builder *builder; + + builder = kvzalloc(sizeof(*builder), GFP_KERNEL); + builder->modify = modify; + + return builder; +} + +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder) +{ + kvfree(builder); +} + +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder) +{ + memset(builder->in, 0, sizeof(builder->in)); +} + +static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder) +{ + if (builder->modify) + return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx); + return MLX5_ADDR_OF(create_tir_in, builder->in, ctx); +} + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); + MLX5_SET(tirc, tirc, inline_rqn, rqn); +} + +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support); +} + +void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder, + const struct mlx5e_lro_param *lro_param) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + const unsigned int rough_max_l2_l3_hdr_sz = 256; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1); + + if (!lro_param->enabled) + return; + + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout); +} + +static int mlx5e_hfunc_to_hw(u8 hfunc) +{ + switch (hfunc) { + case ETH_RSS_HASH_TOP: + return MLX5_RX_HASH_FN_TOEPLITZ; + case ETH_RSS_HASH_XOR: + return MLX5_RX_HASH_FN_INVERTED_XOR8; + default: + return MLX5_RX_HASH_FN_NONE; + } +} + +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + void *hfso; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1); + + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc)); + if (rss_hash->hfunc == ETH_RSS_HASH_TOP) { + const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); + void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, rss_hash->toeplitz_hash_key, len); + } + + if (inner) + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); + else + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields); +} + +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, tls_en, 1); + MLX5_SET(tirc, tirc, self_lb_block, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); +} + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg) +{ + int err; + + tir->mdev = mdev; + + err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn); + if (err) + return err; + + if (reg) { + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + mutex_lock(&res->td.list_lock); + list_add(&tir->list, &res->td.tirs_list); + mutex_unlock(&res->td.list_lock); + } else { + INIT_LIST_HEAD(&tir->list); + } + + return 0; +} + +void mlx5e_tir_destroy(struct mlx5e_tir *tir) +{ + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + /* Skip mutex if list_del is no-op (the TIR wasn't registered in the + * list). list_empty will never return true for an item of tirs_list, + * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency + * of the list->next value. + */ + if (!list_empty(&tir->list)) { + mutex_lock(&res->td.list_lock); + list_del(&tir->list); + mutex_unlock(&res->td.list_lock); + } + + mlx5_core_destroy_tir(tir->mdev, tir->tirn); +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder) +{ + return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h new file mode 100644 index 000000000000..e45149a78ed9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_TIR_H__ +#define __MLX5_EN_TIR_H__ + +#include <linux/kernel.h> + +struct mlx5e_rss_params_hash { + u8 hfunc; + u8 toeplitz_hash_key[40]; +}; + +struct mlx5e_rss_params_traffic_type { + u8 l3_prot_type; + u8 l4_prot_type; + u32 rx_hash_fields; +}; + +struct mlx5e_tir_builder; +struct mlx5e_lro_param; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify); +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder); + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn); +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support); +void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder, + const struct mlx5e_lro_param *lro_param); +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner); +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder); + +struct mlx5_core_dev; + +struct mlx5e_tir { + struct mlx5_core_dev *mdev; + u32 tirn; + struct list_head list; +}; + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg); +void mlx5e_tir_destroy(struct mlx5e_tir *tir); + +static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir) +{ + return tir->tirn; +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder); + +#endif /* __MLX5_EN_TIR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 7f94508594fb..d54607a42740 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -92,30 +92,19 @@ static void mlx5e_close_trap_rq(struct mlx5e_rq *rq) static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqn) { - void *tirc; - int inlen; - u32 *in; + struct mlx5e_tir_builder *builder; int err; - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(false); + if (!builder) return -ENOMEM; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); - MLX5_SET(tirc, tirc, inline_rqn, rqn); - err = mlx5e_create_tir(mdev, tir, in); - kvfree(in); + mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn); + err = mlx5e_tir_init(tir, builder, mdev, true); - return err; -} + mlx5e_tir_builder_free(builder); -static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) -{ - mlx5e_destroy_tir(mdev, tir); + return err; } static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, @@ -173,7 +162,7 @@ err_napi_del: void mlx5e_close_trap(struct mlx5e_trap *trap) { - mlx5e_destroy_trap_direct_rq_tir(trap->mdev, &trap->tir); + mlx5e_tir_destroy(&trap->tir); mlx5e_close_trap_rq(&trap->rq); netif_napi_del(&trap->napi); kvfree(trap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 71e8d66fa150..7b562d2c8a19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -122,7 +122,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, * any Fill Ring entries at the setup stage. */ - err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix); if (unlikely(err)) goto err_deactivate; @@ -169,7 +169,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) goto remove_pool; c = priv->channels.c[ix]; - mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix); mlx5e_deactivate_xsk(c); mlx5e_close_xsk(c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index a8315f166696..c06267477b27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -183,73 +183,3 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c) mlx5e_deactivate_rq(&c->xskrq); /* TX queue is disabled on close. */ } - -static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) -{ - struct mlx5e_redirect_rqt_param direct_rrp = { - .is_rss = false, - { - .rqn = rqn, - }, - }; - - u32 rqtn = priv->xsk_tir[ix].rqt.rqtn; - - return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); -} - -int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) -{ - return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn); -} - -int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) -{ - return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn); -} - -int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) -{ - int err, i; - - if (!priv->xsk.refcnt) - return 0; - - for (i = 0; i < chs->num; i++) { - struct mlx5e_channel *c = chs->c[i]; - - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) - continue; - - err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); - if (unlikely(err)) - goto err_stop; - } - - return 0; - -err_stop: - for (i--; i >= 0; i--) { - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) - continue; - - mlx5e_xsk_redirect_rqt_to_drop(priv, i); - } - - return err; -} - -void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) -{ - int i; - - if (!priv->xsk.refcnt) - return; - - for (i = 0; i < chs->num; i++) { - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) - continue; - - mlx5e_xsk_redirect_rqt_to_drop(priv, i); - } -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h index ca20f1ff5e39..50e111b85efd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -17,9 +17,5 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, void mlx5e_close_xsk(struct mlx5e_channel *c); void mlx5e_activate_xsk(struct mlx5e_channel *c); void mlx5e_deactivate_xsk(struct mlx5e_channel *c); -int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); -int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); -int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); -void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); #endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index e51f60b55daa..4c4ee524176c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -16,13 +16,13 @@ struct mlx5e_accel_fs_tcp { struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES]; }; -static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) +static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) { switch (i) { case ACCEL_FS_IPV4_TCP: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; default: /* ACCEL_FS_IPV6_TCP */ - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; } } @@ -161,7 +161,7 @@ static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv, fs_tcp = priv->fs.accel_tcp; accel_fs_t = &fs_tcp->tables[type]; - dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type)); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type)); rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -307,7 +307,7 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -329,7 +329,7 @@ static int accel_fs_tcp_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.accel_tcp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 34119ce92031..17da23dff0ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -41,11 +41,11 @@ struct mlx5e_ipsec_tx { }; /* IPsec RX flow steering */ -static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i) +static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i) { if (i == ACCEL_FS_ESP4) - return MLX5E_TT_IPV4_IPSEC_ESP; - return MLX5E_TT_IPV6_IPSEC_ESP; + return MLX5_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; } static int rx_err_add_rule(struct mlx5e_priv *priv, @@ -265,7 +265,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type) accel_esp = priv->ipsec->rx_fs; fs_prot = &accel_esp->fs_prot[type]; - fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type)); + fs_prot->default_dest = + mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type)); err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err); if (err) @@ -301,7 +302,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type) /* connect */ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = fs_prot->ft; - mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest); + mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest); out: mutex_unlock(&fs_prot->prot_mutex); @@ -320,7 +321,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type) goto out; /* disconnect */ - mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type)); + mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type)); /* remove FT */ rx_destroy(priv, type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 4e58fade7a60..62abce008c7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -49,7 +49,7 @@ struct mlx5e_ktls_offload_context_rx { struct mlx5e_rq_stats *rq_stats; struct mlx5e_tls_sw_stats *sw_stats; struct completion add_ctx; - u32 tirn; + struct mlx5e_tir tir; u32 key_id; u32 rxq; DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS); @@ -99,31 +99,22 @@ mlx5e_ktls_rx_resync_create_resp_list(void) return resp_list; } -static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn) +static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn) { - int err, inlen; - void *tirc; - u32 *in; + struct mlx5e_tir_builder *builder; + int err; - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(false); + if (!builder) return -ENOMEM; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - - MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); - MLX5_SET(tirc, tirc, indirect_table, rqtn); - MLX5_SET(tirc, tirc, tls_en, 1); - MLX5_SET(tirc, tirc, self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); + mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false); + mlx5e_tir_builder_build_direct(builder); + mlx5e_tir_builder_build_tls(builder); + err = mlx5e_tir_init(tir, builder, mdev, false); - err = mlx5_core_create_tir(mdev, in, tirn); + mlx5e_tir_builder_free(builder); - kvfree(in); return err; } @@ -139,7 +130,8 @@ static void accel_rule_handle_work(struct work_struct *work) goto out; rule = mlx5e_accel_fs_add_sk(accel_rule->priv, priv_rx->sk, - priv_rx->tirn, MLX5_FS_DEFAULT_FLOW_TAG); + mlx5e_tir_get_tirn(&priv_rx->tir), + MLX5_FS_DEFAULT_FLOW_TAG); if (!IS_ERR_OR_NULL(rule)) accel_rule->rule = rule; out: @@ -173,8 +165,8 @@ post_static_params(struct mlx5e_icosq *sq, pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info, - priv_rx->tirn, priv_rx->key_id, - priv_rx->resync.seq, false, + mlx5e_tir_get_tirn(&priv_rx->tir), + priv_rx->key_id, priv_rx->resync.seq, false, TLS_OFFLOAD_CTX_DIR_RX); wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS, @@ -202,8 +194,9 @@ post_progress_params(struct mlx5e_icosq *sq, pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi); - mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_rx->tirn, false, - next_record_tcp_sn, + mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, + mlx5e_tir_get_tirn(&priv_rx->tir), + false, next_record_tcp_sn, TLS_OFFLOAD_CTX_DIR_RX); wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS, @@ -325,7 +318,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, psv = &wqe->psv; psv->num_psv = 1 << 4; psv->l_key = sq->channel->mkey_be; - psv->psv_index[0] = cpu_to_be32(priv_rx->tirn); + psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir)); psv->va = cpu_to_be64(buf->dma_addr); wi = (struct mlx5e_icosq_wqe_info) { @@ -635,9 +628,9 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); - rqtn = priv->direct_tir[rxq].rqt.rqtn; + rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq); - err = mlx5e_ktls_create_tir(mdev, &priv_rx->tirn, rqtn); + err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn); if (err) goto err_create_tir; @@ -658,7 +651,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, return 0; err_post_wqes: - mlx5_core_destroy_tir(mdev, priv_rx->tirn); + mlx5e_tir_destroy(&priv_rx->tir); err_create_tir: mlx5_ktls_destroy_key(mdev, priv_rx->key_id); err_create_key: @@ -693,7 +686,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) if (priv_rx->rule.rule) mlx5e_accel_fs_del_sk(priv_rx->rule.rule); - mlx5_core_destroy_tir(mdev, priv_rx->tirn); + mlx5e_tir_destroy(&priv_rx->tir); mlx5_ktls_destroy_key(mdev, priv_rx->key_id); /* priv_rx should normally be freed here, but if there is an outstanding * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 25403af32859..fe5d82fa6e92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -98,17 +98,17 @@ struct arfs_rule { for (j = 0; j < ARFS_HASH_SIZE; j++) \ hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) -static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) +static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type) { switch (type) { case ARFS_IPV4_TCP: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; case ARFS_IPV4_UDP: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; case ARFS_IPV6_TCP: - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; case ARFS_IPV6_UDP: - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; default: return -EINVAL; } @@ -120,7 +120,7 @@ static int arfs_disable(struct mlx5e_priv *priv) for (i = 0; i < ARFS_NUM_TYPES; i++) { /* Modify ttc rules destination back to their default */ - err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -149,7 +149,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) for (i = 0; i < ARFS_NUM_TYPES; i++) { dest.ft = priv->fs.arfs->arfs_tables[i].ft.t; /* Modify ttc rules destination to point on the aRFS FTs */ - err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n", @@ -192,10 +192,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type]; - struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); - enum mlx5e_traffic_types tt; + enum mlx5_traffic_types tt; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; @@ -206,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, return -EINVAL; } - /* FIXME: Must use mlx5e_ttc_get_default_dest(), + /* FIXME: Must use mlx5_ttc_get_default_dest(), * but can't since TTC default is not setup yet ! */ - dest.tir_num = tir[tt].tirn; + dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); @@ -553,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, 16); } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; + dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -576,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv, int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst.tir_num = priv->direct_tir[rxq].tirn; + dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq); err = mlx5_modify_rule_destination(rule, &dst, NULL); if (err) netdev_warn(priv->netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 8c166ee56d8b..c4db367d4baf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -36,33 +36,6 @@ * Global resources are common to all the netdevices crated on the same nic. */ -int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in) -{ - struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; - int err; - - err = mlx5_core_create_tir(mdev, in, &tir->tirn); - if (err) - return err; - - mutex_lock(&res->td.list_lock); - list_add(&tir->list, &res->td.tirs_list); - mutex_unlock(&res->td.list_lock); - - return 0; -} - -void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, - struct mlx5e_tir *tir) -{ - struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; - - mutex_lock(&res->td.list_lock); - mlx5_core_destroy_tir(mdev, tir->tirn); - list_del(&tir->list); - mutex_unlock(&res->td.list_lock); -} - void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) { bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index bd72572e03d1..2cf59bb5f898 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1172,7 +1172,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) { - return sizeof(priv->rss_params.toeplitz_hash_key); + return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key); } static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) @@ -1198,18 +1198,10 @@ int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rss_params *rss = &priv->rss_params; - if (indir) - memcpy(indir, rss->indirection_rqt, - sizeof(rss->indirection_rqt)); - - if (key) - memcpy(key, rss->toeplitz_hash_key, - sizeof(rss->toeplitz_hash_key)); - - if (hfunc) - *hfunc = rss->hfunc; + mutex_lock(&priv->state_lock); + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, indir, key, hfunc); + mutex_unlock(&priv->state_lock); return 0; } @@ -1218,64 +1210,13 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rss_params *rss = &priv->rss_params; - int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - bool refresh_tirs = false; - bool refresh_rqt = false; - void *in; - - if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && - (hfunc != ETH_RSS_HASH_XOR) && - (hfunc != ETH_RSS_HASH_TOP)) - return -EINVAL; - - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; + int err; mutex_lock(&priv->state_lock); - - if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) { - rss->hfunc = hfunc; - refresh_rqt = true; - refresh_tirs = true; - } - - if (indir) { - memcpy(rss->indirection_rqt, indir, - sizeof(rss->indirection_rqt)); - refresh_rqt = true; - } - - if (key) { - memcpy(rss->toeplitz_hash_key, key, - sizeof(rss->toeplitz_hash_key)); - refresh_tirs = refresh_tirs || rss->hfunc == ETH_RSS_HASH_TOP; - } - - if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) { - struct mlx5e_redirect_rqt_param rrp = { - .is_rss = true, - { - .rss = { - .hfunc = rss->hfunc, - .channels = &priv->channels, - }, - }, - }; - u32 rqtn = priv->indir_rqt.rqtn; - - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); - } - - if (refresh_tirs) - mlx5e_modify_tirs_hash(priv, in); - + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); mutex_unlock(&priv->state_lock); - - kvfree(in); - - return 0; + return err; } #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 0b75fab41ae8..5c754e9af669 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -718,7 +718,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv) if (!spec) return -ENOMEM; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.ttc.ft.t; + dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); rule_p = &priv->fs.promisc.rule; *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); @@ -854,593 +854,59 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) ft->t = NULL; } -static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) -{ - int i; - - for (i = 0; i < MLX5E_NUM_TT; i++) { - if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { - mlx5_del_flow_rules(ttc->rules[i].rule); - ttc->rules[i].rule = NULL; - } - } - - for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { - if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { - mlx5_del_flow_rules(ttc->tunnel_rules[i]); - ttc->tunnel_rules[i] = NULL; - } - } -} - -struct mlx5e_etype_proto { - u16 etype; - u8 proto; -}; - -static struct mlx5e_etype_proto ttc_rules[] = { - [MLX5E_TT_IPV4_TCP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_TCP, - }, - [MLX5E_TT_IPV6_TCP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_TCP, - }, - [MLX5E_TT_IPV4_UDP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_UDP, - }, - [MLX5E_TT_IPV6_UDP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_UDP, - }, - [MLX5E_TT_IPV4_IPSEC_AH] = { - .etype = ETH_P_IP, - .proto = IPPROTO_AH, - }, - [MLX5E_TT_IPV6_IPSEC_AH] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_AH, - }, - [MLX5E_TT_IPV4_IPSEC_ESP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_ESP, - }, - [MLX5E_TT_IPV6_IPSEC_ESP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_ESP, - }, - [MLX5E_TT_IPV4] = { - .etype = ETH_P_IP, - .proto = 0, - }, - [MLX5E_TT_IPV6] = { - .etype = ETH_P_IPV6, - .proto = 0, - }, - [MLX5E_TT_ANY] = { - .etype = 0, - .proto = 0, - }, -}; - -static struct mlx5e_etype_proto ttc_tunnel_rules[] = { - [MLX5E_TT_IPV4_GRE] = { - .etype = ETH_P_IP, - .proto = IPPROTO_GRE, - }, - [MLX5E_TT_IPV6_GRE] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_GRE, - }, - [MLX5E_TT_IPV4_IPIP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_IPIP, - }, - [MLX5E_TT_IPV6_IPIP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_IPIP, - }, - [MLX5E_TT_IPV4_IPV6] = { - .etype = ETH_P_IP, - .proto = IPPROTO_IPV6, - }, - [MLX5E_TT_IPV6_IPV6] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_IPV6, - }, - -}; - -u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt) -{ - return ttc_tunnel_rules[tt].proto; -} - -static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type) -{ - switch (proto_type) { - case IPPROTO_GRE: - return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); - case IPPROTO_IPIP: - case IPPROTO_IPV6: - return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || - MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); - default: - return false; - } -} - -static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) -{ - int tt; - - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto)) - return true; - } - return false; -} - -bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) -{ - return (mlx5e_tunnel_any_rx_proto_supported(mdev) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); -} - -static u8 mlx5e_etype_to_ipv(u16 ethertype) -{ - if (ethertype == ETH_P_IP) - return 4; - - if (ethertype == ETH_P_IPV6) - return 6; - - return 0; -} - -static struct mlx5_flow_handle * -mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, - u8 proto) -{ - int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err = 0; - u8 ipv; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return ERR_PTR(-ENOMEM); - - if (proto) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); - } - - ipv = mlx5e_etype_to_ipv(etype); - if (match_ipv_outer && ipv) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); - } else if (etype) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); - } - - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add rule failed\n", __func__); - } - - kvfree(spec); - return err ? ERR_PTR(err) : rule; -} - -static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, - struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_handle **trules; - struct mlx5e_ttc_rule *rules; - struct mlx5_flow_table *ft; - int tt; - int err; - - ft = ttc->ft.t; - rules = ttc->rules; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - struct mlx5e_ttc_rule *rule = &rules[tt]; - - if (tt == MLX5E_TT_ANY) - dest.tir_num = params->any_tt_tirn; - else - dest.tir_num = params->indir_tirn[tt]; - - rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest, - ttc_rules[tt].etype, - ttc_rules[tt].proto); - if (IS_ERR(rule->rule)) { - err = PTR_ERR(rule->rule); - rule->rule = NULL; - goto del_rules; - } - rule->default_dest = dest; - } - - if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return 0; - - trules = ttc->tunnel_rules; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = params->inner_ttc->ft.t; - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (!mlx5e_tunnel_proto_supported_rx(priv->mdev, - ttc_tunnel_rules[tt].proto)) - continue; - trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, - ttc_tunnel_rules[tt].etype, - ttc_tunnel_rules[tt].proto); - if (IS_ERR(trules[tt])) { - err = PTR_ERR(trules[tt]); - trules[tt] = NULL; - goto del_rules; - } - } - - return 0; - -del_rules: - mlx5e_cleanup_ttc_rules(ttc); - return err; -} - -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, - bool use_ipv) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_flow_table *ft = &ttc->ft; - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS, - sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - kfree(ft->g); - ft->g = NULL; - return -ENOMEM; - } - - /* L4 Group */ - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - if (use_ipv) - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); - else - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP1_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* L3 Group */ - MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP2_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Any Group */ - memset(in, 0, inlen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP3_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -static struct mlx5_flow_handle * -mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, u8 proto) -{ - MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err = 0; - u8 ipv; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return ERR_PTR(-ENOMEM); - - ipv = mlx5e_etype_to_ipv(etype); - if (etype && ipv) { - spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); - MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); - } - - if (proto) { - spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); - } - - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add rule failed\n", __func__); - } - - kvfree(spec); - return err ? ERR_PTR(err) : rule; -} - -static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv, - struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5e_ttc_rule *rules; - struct mlx5_flow_table *ft; - int err; - int tt; - - ft = ttc->ft.t; - rules = ttc->rules; - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - struct mlx5e_ttc_rule *rule = &rules[tt]; - - if (tt == MLX5E_TT_ANY) - dest.tir_num = params->any_tt_tirn; - else - dest.tir_num = params->indir_tirn[tt]; - - rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, - ttc_rules[tt].etype, - ttc_rules[tt].proto); - if (IS_ERR(rule->rule)) { - err = PTR_ERR(rule->rule); - rule->rule = NULL; - goto del_rules; - } - rule->default_dest = dest; - } - - return 0; - -del_rules: - - mlx5e_cleanup_ttc_rules(ttc); - return err; -} - -static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_flow_table *ft = &ttc->ft; - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - kfree(ft->g); - ft->g = NULL; - return -ENOMEM; - } - - /* L4 Group */ - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP1_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* L3 Group */ - MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP2_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Any Group */ - memset(in, 0, inlen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP3_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, - struct ttc_params *ttc_params) -{ - ttc_params->any_tt_tirn = priv->direct_tir[0].tirn; - ttc_params->inner_ttc = &priv->fs.inner_ttc; -} - -void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; - ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : + mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res, + tt); + } } -void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) +void mlx5e_set_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params, bool tunnel) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; - ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; -} - -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5e_flow_table *ft = &ttc->ft; - int err; - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return 0; - - ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); - if (IS_ERR(ft->t)) { - err = PTR_ERR(ft->t); - ft->t = NULL; - return err; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : + mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); } - err = mlx5e_create_inner_ttc_table_groups(ttc); - if (err) - goto err; - - err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc); - if (err) - goto err; - - return 0; - -err: - mlx5e_destroy_flow_table(ft); - return err; -} - -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) -{ - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + ttc_params->inner_ttc = tunnel; + if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev)) return; - mlx5e_cleanup_ttc_rules(ttc); - mlx5e_destroy_flow_table(&ttc->ft); -} - -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) -{ - mlx5e_cleanup_ttc_rules(ttc); - mlx5e_destroy_flow_table(&ttc->ft); -} - -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - struct mlx5e_flow_table *ft = &ttc->ft; - int err; - - ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); - if (IS_ERR(ft->t)) { - err = PTR_ERR(ft->t); - ft->t = NULL; - return err; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(priv->fs.inner_ttc); } - - err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); - if (err) - goto err; - - err = mlx5e_generate_ttc_table_rules(priv, params, ttc); - if (err) - goto err; - - return 0; -err: - mlx5e_destroy_flow_table(ft); - return err; -} - -int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type, - struct mlx5_flow_destination *new_dest) -{ - return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL); -} - -struct mlx5_flow_destination -mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type) -{ - struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest; - - WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, - "TTC[%d] default dest is not setup yet", type); - - return *dest; -} - -int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type) -{ - struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type); - - return mlx5e_ttc_fwd_dest(priv, type, &dest); } static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, @@ -1473,7 +939,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, outer_headers.dmac_47_16); dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.ttc.ft.t; + dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); switch (type) { case MLX5E_FULLMATCH: @@ -1769,10 +1235,46 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) kvfree(priv->fs.vlan); } -int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + return; + mlx5_destroy_ttc_table(priv->fs.inner_ttc); +} + +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_ttc_table(priv->fs.ttc); +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) { struct ttc_params ttc_params = {}; - int tt, err; + + if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + mlx5e_set_inner_ttc_params(priv, &ttc_params); + priv->fs.inner_ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.inner_ttc)) + return PTR_ERR(priv->fs.inner_ttc); + return 0; +} + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +{ + struct ttc_params ttc_params = {}; + + mlx5e_set_ttc_params(priv, &ttc_params, true); + priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.ttc)) + return PTR_ERR(priv->fs.ttc); + return 0; +} + +int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +{ + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -1787,23 +1289,15 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - mlx5e_set_ttc_basic_params(priv, &ttc_params); - mlx5e_set_inner_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; - - err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); + err = mlx5e_create_inner_ttc_table(priv); if (err) { - netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + netdev_err(priv->netdev, + "Failed to create inner ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } - mlx5e_set_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -1837,9 +1331,9 @@ err_destory_vlan_table: err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_ttc_table(priv); err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1851,8 +1345,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_ptp_free_rx_fs(priv); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index b416a8ee2eed..3d8918f9399e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -421,11 +421,9 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } else { struct mlx5e_params *params = &priv->channels.params; enum mlx5e_rq_group group; - struct mlx5e_tir *tir; u16 ix; mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); - tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir; dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { @@ -434,7 +432,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = tir[ix].tirn; + if (group == MLX5E_RQ_GROUP_XSK) + dst->tir_num = mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix); + else + dst->tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } @@ -785,45 +786,44 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) INIT_LIST_HEAD(&priv->fs.ethtool.rules); } -static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type) +static int flow_type_to_traffic_type(u32 flow_type) { switch (flow_type) { case TCP_V4_FLOW: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; case TCP_V6_FLOW: - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; case UDP_V4_FLOW: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; case UDP_V6_FLOW: - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; case AH_V4_FLOW: - return MLX5E_TT_IPV4_IPSEC_AH; + return MLX5_TT_IPV4_IPSEC_AH; case AH_V6_FLOW: - return MLX5E_TT_IPV6_IPSEC_AH; + return MLX5_TT_IPV6_IPSEC_AH; case ESP_V4_FLOW: - return MLX5E_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV4_IPSEC_ESP; case ESP_V6_FLOW: - return MLX5E_TT_IPV6_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; case IPV4_FLOW: - return MLX5E_TT_IPV4; + return MLX5_TT_IPV4; case IPV6_FLOW: - return MLX5E_TT_IPV6; + return MLX5_TT_IPV6; default: - return MLX5E_NUM_INDIR_TIRS; + return -EINVAL; } } static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - enum mlx5e_traffic_types tt; u8 rx_hash_field = 0; - void *in; + int err; + int tt; tt = flow_type_to_traffic_type(nfc->flow_type); - if (tt == MLX5E_NUM_INDIR_TIRS) - return -EINVAL; + if (tt < 0) + return tt; /* RSS does not support anything other than hashing to queues * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest @@ -848,35 +848,24 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, if (nfc->data & RXH_L4_B_2_3) rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - mutex_lock(&priv->state_lock); - - if (rx_hash_field == priv->rss_params.rx_hash_fields[tt]) - goto out; - - priv->rss_params.rx_hash_fields[tt] = rx_hash_field; - mlx5e_modify_tirs_hash(priv, in); - -out: + err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field); mutex_unlock(&priv->state_lock); - kvfree(in); - return 0; + + return err; } static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - enum mlx5e_traffic_types tt; u32 hash_field = 0; + int tt; tt = flow_type_to_traffic_type(nfc->flow_type); - if (tt == MLX5E_NUM_INDIR_TIRS) - return -EINVAL; + if (tt < 0) + return tt; - hash_field = priv->rss_params.rx_hash_fields[tt]; + hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt); nfc->data = 0; if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 37c440837945..25a0b5f0984a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2194,400 +2194,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) chs->num = 0; } -static int -mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqtc; - int inlen; - int err; - u32 *in; - int i; - - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); - - MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - - for (i = 0; i < sz; i++) - MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn); - - err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); - if (!err) - rqt->enabled = true; - - kvfree(in); - return err; -} - -void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) -{ - rqt->enabled = false; - mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); -} - -int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) -{ - struct mlx5e_rqt *rqt = &priv->indir_rqt; - int err; - - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt); - if (err) - mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); - return err; -} - -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) -{ - int err; - int ix; - - for (ix = 0; ix < n; ix++) { - err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); - if (unlikely(err)) - goto err_destroy_rqts; - } - - return 0; - -err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err); - for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, &tirs[ix].rqt); - - return err; -} - -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) -{ - int i; - - for (i = 0; i < n; i++) - mlx5e_destroy_rqt(priv, &tirs[i].rqt); -} - -static int mlx5e_rx_hash_fn(int hfunc) -{ - return (hfunc == ETH_RSS_HASH_TOP) ? - MLX5_RX_HASH_FN_TOEPLITZ : - MLX5_RX_HASH_FN_INVERTED_XOR8; -} - -int mlx5e_bits_invert(unsigned long a, int size) -{ - int inv = 0; - int i; - - for (i = 0; i < size; i++) - inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; - - return inv; -} - -static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, - struct mlx5e_redirect_rqt_param rrp, void *rqtc) -{ - int i; - - for (i = 0; i < sz; i++) { - u32 rqn; - - if (rrp.is_rss) { - int ix = i; - - if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, ilog2(sz)); - - ix = priv->rss_params.indirection_rqt[ix]; - rqn = rrp.rss.channels->c[ix]->rq.rqn; - } else { - rqn = rrp.rqn; - } - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } -} - -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, - struct mlx5e_redirect_rqt_param rrp) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqtc; - int inlen; - u32 *in; - int err; - - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); - - MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc); - err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); - - kvfree(in); - return err; -} - -static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, - struct mlx5e_redirect_rqt_param rrp) -{ - if (!rrp.is_rss) - return rrp.rqn; - - if (ix >= rrp.rss.channels->num) - return priv->drop_rq.rqn; - - return rrp.rss.channels->c[ix]->rq.rqn; -} - -static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, - struct mlx5e_redirect_rqt_param rrp, - struct mlx5e_redirect_rqt_param *ptp_rrp) -{ - u32 rqtn; - int ix; - - if (priv->indir_rqt.enabled) { - /* RSS RQ table */ - rqtn = priv->indir_rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); - } - - for (ix = 0; ix < priv->max_nch; ix++) { - struct mlx5e_redirect_rqt_param direct_rrp = { - .is_rss = false, - { - .rqn = mlx5e_get_direct_rqn(priv, ix, rrp) - }, - }; - - /* Direct RQ Tables */ - if (!priv->direct_tir[ix].rqt.enabled) - continue; - - rqtn = priv->direct_tir[ix].rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); - } - if (ptp_rrp) { - rqtn = priv->ptp_tir.rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp); - } -} - -static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *chs) -{ - bool rx_ptp_support = priv->profile->rx_ptp_support; - struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL; - struct mlx5e_redirect_rqt_param rrp = { - .is_rss = true, - { - .rss = { - .channels = chs, - .hfunc = priv->rss_params.hfunc, - } - }, - }; - struct mlx5e_redirect_rqt_param ptp_rrp; - - if (rx_ptp_support) { - u32 ptp_rqn; - - ptp_rrp.is_rss = false; - ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ? - priv->drop_rq.rqn : ptp_rqn; - ptp_rrp_p = &ptp_rrp; - } - mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p); -} - -static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) -{ - bool rx_ptp_support = priv->profile->rx_ptp_support; - struct mlx5e_redirect_rqt_param drop_rrp = { - .is_rss = false, - { - .rqn = priv->drop_rq.rqn, - }, - }; - - mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL); -} - -static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { - [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP, - }, - [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP, - }, -}; - -struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt) -{ - return tirc_default_config[tt]; -} - -static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) -{ - if (!params->lro_en) - return; - -#define ROUGH_MAX_L2_L3_HDR_SZ 256 - - MLX5_SET(tirc, tirc, lro_enable_mask, - MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | - MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); - MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); -} - -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, - const struct mlx5e_tirc_config *ttconfig, - void *tirc, bool inner) -{ - void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : - MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - - MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc)); - if (rss_params->hfunc == ETH_RSS_HASH_TOP) { - void *rss_key = MLX5_ADDR_OF(tirc, tirc, - rx_hash_toeplitz_key); - size_t len = MLX5_FLD_SZ_BYTES(tirc, - rx_hash_toeplitz_key); - - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, rss_params->toeplitz_hash_key, len); - } - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - ttconfig->l3_prot_type); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - ttconfig->l4_prot_type); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - ttconfig->rx_hash_fields); -} - -static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig, - enum mlx5e_traffic_types tt, - u32 rx_hash_fields) -{ - *ttconfig = tirc_default_config[tt]; - ttconfig->rx_hash_fields = rx_hash_fields; -} - -void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in) -{ - void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - struct mlx5e_rss_params *rss = &priv->rss_params; - struct mlx5_core_dev *mdev = priv->mdev; - int ctxlen = MLX5_ST_SZ_BYTES(tirc); - struct mlx5e_tirc_config ttconfig; - int tt; - - MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_update_rx_hash_fields(&ttconfig, tt, - rss->rx_hash_fields[tt]); - mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false); - mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in); - } - - /* Verify inner tirs resources allocated */ - if (!priv->inner_indir_tir[0].tirn) - return; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_update_rx_hash_fields(&ttconfig, tt, - rss->rx_hash_fields[tt]); - mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true); - mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in); - } -} - static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - - void *in; - void *tirc; - int inlen; - int err; - int tt; - int ix; - - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - MLX5_SET(modify_tir_in, in, bitmask.lro, 1); - tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - - mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in); - if (err) - goto free_in; - } + struct mlx5e_rx_res *res = priv->rx_res; + struct mlx5e_lro_param lro_param; - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in); - if (err) - goto free_in; - } + lro_param = mlx5e_get_lro_param(&priv->channels.params); -free_in: - kvfree(in); - - return err; + return mlx5e_rx_res_lro_set_param(res, &lro_param); } static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro); @@ -2768,9 +2382,9 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv) mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); + /* This function may be called on attach, before priv->rx_res is created. */ + if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res) + mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count); return 0; } @@ -2829,16 +2443,15 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); - mlx5e_redirect_rqts_to_channels(priv, &priv->channels); - mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); + if (priv->rx_res) + mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { - mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); - - mlx5e_redirect_rqts_to_drop(priv); + if (priv->rx_res) + mlx5e_rx_res_channels_deactivate(priv->rx_res); if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); @@ -3213,161 +2826,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_destroy_tises(priv); } -static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, - u32 rqtn, u32 *tirc) -{ - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, rqtn); - MLX5_SET(tirc, tirc, tunneled_offload_en, - priv->channels.params.tunneled_offload_en); - - mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); -} - -static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, - enum mlx5e_traffic_types tt, - u32 *tirc) -{ - mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc); - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, - &tirc_default_config[tt], tirc, false); -} - -static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) -{ - mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); -} - -static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, - enum mlx5e_traffic_types tt, - u32 *tirc) -{ - mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc); - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, - &tirc_default_config[tt], tirc, true); -} - -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) -{ - struct mlx5e_tir *tir; - void *tirc; - int inlen; - int i = 0; - int err; - u32 *in; - int tt; - - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(in, 0, inlen); - tir = &priv->indir_tir[tt]; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_indir_tir_ctx(priv, tt, tirc); - err = mlx5e_create_tir(priv->mdev, tir, in); - if (err) { - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); - goto err_destroy_inner_tirs; - } - } - - if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) - goto out; - - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { - memset(in, 0, inlen); - tir = &priv->inner_indir_tir[i]; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); - err = mlx5e_create_tir(priv->mdev, tir, in); - if (err) { - mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); - goto err_destroy_inner_tirs; - } - } - -out: - kvfree(in); - - return 0; - -err_destroy_inner_tirs: - for (i--; i >= 0; i--) - mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); - - for (tt--; tt >= 0; tt--) - mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); - - kvfree(in); - - return err; -} - -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) -{ - struct mlx5e_tir *tir; - void *tirc; - int inlen; - int err = 0; - u32 *in; - int ix; - - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - for (ix = 0; ix < n; ix++) { - memset(in, 0, inlen); - tir = &tirs[ix]; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc); - err = mlx5e_create_tir(priv->mdev, tir, in); - if (unlikely(err)) - goto err_destroy_ch_tirs; - } - - goto out; - -err_destroy_ch_tirs: - mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); - for (ix--; ix >= 0; ix--) - mlx5e_destroy_tir(priv->mdev, &tirs[ix]); - -out: - kvfree(in); - - return err; -} - -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) -{ - int i; - - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); - - /* Verify inner tirs resources allocated */ - if (!priv->inner_indir_tir[0].tirn) - return; - - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); -} - -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) -{ - int i; - - for (i = 0; i < n; i++) - mlx5e_destroy_tir(priv->mdev, &tirs[i]); -} - static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) { int err = 0; @@ -4591,7 +4049,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_set_features = mlx5e_set_features, .ndo_fix_features = mlx5e_fix_features, .ndo_change_mtu = mlx5e_change_nic_mtu, - .ndo_do_ioctl = mlx5e_ioctl, + .ndo_eth_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, .ndo_features_check = mlx5e_features_check, .ndo_tx_timeout = mlx5e_tx_timeout, @@ -4620,15 +4078,6 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_get_devlink_port = mlx5e_get_devlink_port, }; -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, - int num_channels) -{ - int i; - - for (i = 0; i < len; i++) - indirection_rqt[i] = i % num_channels; -} - static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -4641,24 +4090,8 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, - u16 num_channels) -{ - enum mlx5e_traffic_types tt; - - rss_params->hfunc = ETH_RSS_HASH_TOP; - netdev_rss_key_fill(rss_params->toeplitz_hash_key, - sizeof(rss_params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, - MLX5E_INDIR_RQT_SIZE, num_channels); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - rss_params->rx_hash_fields[tt] = - tirc_default_config[tt].rx_hash_fields; -} - void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { - struct mlx5e_rss_params *rss_params = &priv->rss_params; struct mlx5e_params *params = &priv->channels.params; struct mlx5_core_dev *mdev = priv->mdev; u8 rx_cq_period_mode; @@ -4718,10 +4151,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - /* RSS */ - mlx5e_build_rss_params(rss_params, params->num_channels); - params->tunneled_offload_en = - mlx5e_tunnel_inner_ft_supported(mdev); + params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev); /* AF_XDP */ params->xsk = xsk; @@ -4781,8 +4211,8 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev) { int tt; - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt))) + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt))) return true; } return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)); @@ -4821,7 +4251,14 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; + /* Tunneled LRO is not supported in the driver, and the same RQs are + * shared between inner and outer TIRs, so the driver can't disable LRO + * for inner TIRs while having it enabled for outer TIRs. Due to this, + * block LRO altogether if the firmware declares tunneled LRO support. + */ if (!!MLX5_CAP_ETH(mdev, lro_cap) && + !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && + !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && mlx5e_check_fragmented_striding_rq_cap(mdev)) netdev->vlan_features |= NETIF_F_LRO; @@ -4948,7 +4385,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct devlink_port *dl_port; int err; mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); @@ -4964,19 +4400,13 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); - dl_port = mlx5e_devlink_get_dl_port(priv); - if (dl_port->registered) - mlx5e_health_create_reporters(priv); - + mlx5e_health_create_reporters(priv); return 0; } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { - struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); - - if (dl_port->registered) - mlx5e_health_destroy_reporters(priv); + mlx5e_health_destroy_reporters(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); } @@ -4984,9 +4414,14 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - u16 max_nch = priv->max_nch; + enum mlx5e_rx_res_features features; + struct mlx5e_lro_param lro_param; int err; + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) + return -ENOMEM; + mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -4995,42 +4430,20 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - err = mlx5e_create_indirect_rqt(priv); + features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP; + if (priv->channels.params.tunneled_offload_en) + features |= MLX5E_RX_RES_FEATURE_INNER_FT; + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, true); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); - if (err) - goto err_destroy_indirect_tirs; - - err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch); - if (unlikely(err)) - goto err_destroy_direct_tirs; - - err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch); - if (unlikely(err)) - goto err_destroy_xsk_rqts; - - err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1); - if (err) - goto err_destroy_xsk_tirs; - - err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1); - if (err) - goto err_destroy_ptp_rqt; - err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_ptp_direct_tir; + goto err_destroy_rx_res; } err = mlx5e_tc_nic_init(priv); @@ -5051,46 +4464,27 @@ err_tc_nic_cleanup: mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); -err_destroy_ptp_direct_tir: - mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); -err_destroy_ptp_rqt: - mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); -err_destroy_xsk_tirs: - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); -err_destroy_xsk_rqts: - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); -err_destroy_indirect_rqts: - mlx5e_destroy_rqt(priv, &priv->indir_rqt); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; return err; } static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { - u16 max_nch = priv->max_nch; - mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); - mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; } static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index bf94bcb6fa5d..1e520640f7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -585,9 +585,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->tunneled_offload_en = false; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - - /* RSS */ - mlx5e_build_rss_params(&priv->rss_params, params->num_channels); } static void mlx5e_build_rep_netdev(struct net_device *netdev, @@ -651,25 +648,23 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; struct ttc_params ttc_params = {}; - int tt, err; + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); /* The inner_ttc in the ttc params is intentionally not set */ - ttc_params.any_tt_tirn = priv->direct_tir[0].tirn; - mlx5e_set_ttc_ft_params(&ttc_params); + mlx5e_set_ttc_params(priv, &ttc_params, false); if (rep->vport != MLX5_VPORT_UPLINK) /* To give uplik rep TTC a lower level for chaining from root ft */ ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err); + priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.ttc)) { + err = PTR_ERR(priv->fs.ttc); + netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", + err); return err; } return 0; @@ -687,7 +682,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) /* non uplik reps will skip any bypass tables and go directly to * their own ttc */ - rpriv->root_ft = priv->fs.ttc.ft.t; + rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc); return 0; } @@ -760,9 +755,13 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - u16 max_nch = priv->max_nch; + struct mlx5e_lro_param lro_param; int err; + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) + return -ENOMEM; + mlx5e_init_l2_addr(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -771,25 +770,16 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_indirect_rqt(priv); + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, false); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); - if (err) - goto err_destroy_indirect_tirs; - err = mlx5e_create_rep_ttc_table(priv); if (err) - goto err_destroy_direct_tirs; + goto err_destroy_rx_res; err = mlx5e_create_rep_root_ft(priv); if (err) @@ -806,33 +796,26 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err_destroy_root_ft: mlx5e_destroy_rep_root_ft(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); -err_destroy_indirect_rqts: - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5_destroy_ttc_table(priv->fs.ttc); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; return err; } static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - u16 max_nch = priv->max_nch; - mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5_destroy_ttc_table(priv->fs.ttc); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; } static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d273758255c3..349a93e0213d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -34,19 +34,13 @@ #include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> -#include <net/tc_act/tc_gact.h> -#include <net/tc_act/tc_skbedit.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> #include <linux/refcount.h> #include <linux/completion.h> -#include <net/tc_act/tc_mirred.h> -#include <net/tc_act/tc_vlan.h> -#include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> -#include <net/tc_act/tc_mpls.h> #include <net/psample.h> #include <net/arp.h> #include <net/ipv6_stubs.h> @@ -340,12 +334,12 @@ struct mlx5e_hairpin { struct mlx5_core_dev *func_mdev; struct mlx5e_priv *func_priv; u32 tdn; - u32 tirn; + struct mlx5e_tir direct_tir; int num_channels; struct mlx5e_rqt indir_rqt; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_ttc_table ttc; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5_ttc_table *ttc; }; struct mlx5e_hairpin_entry { @@ -482,126 +476,101 @@ struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) { - u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {}; - void *tirc; + struct mlx5e_tir_builder *builder; int err; + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); if (err) - goto alloc_tdn_err; - - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); - MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); - MLX5_SET(tirc, tirc, transport_domain, hp->tdn); + goto out; - err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn); + mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); + err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); if (err) goto create_tir_err; - return 0; +out: + mlx5e_tir_builder_free(builder); + return err; create_tir_err: mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); -alloc_tdn_err: - return err; + + goto out; } static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) { - mlx5_core_destroy_tir(hp->func_mdev, hp->tirn); + mlx5e_tir_destroy(&hp->direct_tir); mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); } -static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) -{ - struct mlx5e_priv *priv = hp->func_priv; - int i, ix, sz = MLX5E_INDIR_RQT_SIZE; - u32 *indirection_rqt, rqn; - - indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL); - if (!indirection_rqt) - return -ENOMEM; - - mlx5e_build_default_indir_rqt(indirection_rqt, sz, - hp->num_channels); - - for (i = 0; i < sz; i++) { - ix = i; - if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, ilog2(sz)); - ix = indirection_rqt[ix]; - rqn = hp->pair->rqn[ix]; - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } - - kfree(indirection_rqt); - return 0; -} - static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) { - int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; struct mlx5e_priv *priv = hp->func_priv; struct mlx5_core_dev *mdev = priv->mdev; - void *rqtc; - u32 *in; + struct mlx5e_rss_params_indir *indir; + int err; - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + indir = kvmalloc(sizeof(*indir), GFP_KERNEL); + if (!indir) return -ENOMEM; - rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); - - MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - - err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); - if (err) - goto out; + mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); + err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, + mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, + indir); - err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); - if (!err) - hp->indir_rqt.enabled = true; - -out: - kvfree(in); + kvfree(indir); return err; } static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) { struct mlx5e_priv *priv = hp->func_priv; - u32 in[MLX5_ST_SZ_DW(create_tir_in)]; - int tt, i, err; - void *tirc; + struct mlx5e_rss_params_hash rss_hash; + enum mlx5_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + int err = 0; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); + struct mlx5e_rss_params_traffic_type rss_tt; - memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + rss_tt = mlx5e_rss_get_default_tt_config(tt); - MLX5_SET(tirc, tirc, transport_domain, hp->tdn); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); + mlx5e_tir_builder_build_rqt(builder, hp->tdn, + mlx5e_rqt_get_rqtn(&hp->indir_rqt), + false); + mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); - err = mlx5_core_create_tir(hp->func_mdev, in, - &hp->indir_tirn[tt]); + err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); if (err) { mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); goto err_destroy_tirs; } + + mlx5e_tir_builder_clear(builder); } - return 0; -err_destroy_tirs: - for (i = 0; i < tt; i++) - mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); +out: + mlx5e_tir_builder_free(builder); return err; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&hp->indir_tir[tt]); + + goto out; } static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) @@ -609,7 +578,7 @@ static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) int tt; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); + mlx5e_tir_destroy(&hp->indir_tir[tt]); } static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, @@ -620,12 +589,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->any_tt_tirn = hp->tirn; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; + ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_tir_get_tirn(&hp->direct_tir) : + mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + } - ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; ft_attr->prio = MLX5E_TC_PRIO; } @@ -645,30 +618,31 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) goto err_create_indirect_tirs; mlx5e_hairpin_set_ttc_params(hp, &ttc_params); - err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); - if (err) + hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(hp->ttc)) { + err = PTR_ERR(hp->ttc); goto err_create_ttc_table; + } netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", - hp->num_channels, hp->ttc.ft.t->id); + hp->num_channels, + mlx5_get_ttc_flow_table(priv->fs.ttc)->id); return 0; err_create_ttc_table: mlx5e_hairpin_destroy_indirect_tirs(hp); err_create_indirect_tirs: - mlx5e_destroy_rqt(priv, &hp->indir_rqt); + mlx5e_rqt_destroy(&hp->indir_rqt); return err; } static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) { - struct mlx5e_priv *priv = hp->func_priv; - - mlx5e_destroy_ttc_table(priv, &hp->ttc); + mlx5_destroy_ttc_table(hp->ttc); mlx5e_hairpin_destroy_indirect_tirs(hp); - mlx5e_destroy_rqt(priv, &hp->indir_rqt); + mlx5e_rqt_destroy(&hp->indir_rqt); } static struct mlx5e_hairpin * @@ -903,16 +877,17 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", - hp->tirn, hp->pair->rqn[0], + mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); attach_flow: if (hpe->hp->num_channels > 1) { flow_flag_set(flow, HAIRPIN_RSS); - flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + flow->attr->nic_attr->hairpin_ft = + mlx5_get_ttc_flow_table(hpe->hp->ttc); } else { - flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn; + flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); } flow->hpe = hpe; @@ -1056,15 +1031,17 @@ err_ft_get: static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_fc *counter = NULL; + struct mlx5_fc *counter; int err; + parse_attr = attr->parse_attr; + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) @@ -1384,9 +1361,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, bool vf_tun = false, encap_valid = true; struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; + struct mlx5_fc *counter; u32 max_prio, max_chain; int err = 0; int out_index; @@ -3352,10 +3329,10 @@ static int validate_goto_chain(struct mlx5e_priv *priv, static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct pedit_headers_action hdrs[2] = {}; const struct flow_action_entry *act; @@ -3371,8 +3348,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; nic_attr = attr->nic_attr; - nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; flow_action_for_each(i, act, flow_action) { switch (act->id) { @@ -3381,10 +3358,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - if (MLX5_CAP_FLOWTABLE(priv->mdev, - flow_table_properties_nic_receive.flow_counter)) - action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: @@ -3425,7 +3400,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, "device is not on same HW, can't offload"); netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", peer_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@ -3435,7 +3410,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, "Bad flow mark - only 16 bit is supported"); - return -EINVAL; + return -EOPNOTSUPP; } nic_attr->flow_tag = mark; @@ -3732,8 +3707,7 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack, - struct net_device *filter_dev) + struct netlink_ext_ack *extack) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3798,7 +3772,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, "mpls pop supported only as first action"); return -EOPNOTSUPP; } - if (!netif_is_bareudp(filter_dev)) { + if (!netif_is_bareudp(parse_attr->filter_dev)) { NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); return -EOPNOTSUPP; @@ -3947,7 +3921,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, "devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@ -4300,7 +4274,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; @@ -4446,11 +4420,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); + err = parse_tc_nic_actions(priv, &rule->action, flow, extack); if (err) goto err_free; - err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); + err = mlx5e_tc_add_nic_flow(priv, flow, extack); if (err) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 011e766e4f67..feecf44994a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3083,8 +3083,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) { + err = 0; goto out; + } + fallthrough; case MLX5_CAP_INLINE_MODE_L2: NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 7d7ed025db0d..67571e5040d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -50,7 +50,7 @@ static const struct net_device_ops mlx5i_netdev_ops = { .ndo_init = mlx5i_dev_init, .ndo_uninit = mlx5i_dev_cleanup, .ndo_change_mtu = mlx5i_change_mtu, - .ndo_do_ioctl = mlx5i_ioctl, + .ndo_eth_ioctl = mlx5i_ioctl, }; /* IPoIB mlx5 netdev profile */ @@ -314,8 +314,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { - struct ttc_params ttc_params = {}; - int tt, err; + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -330,33 +329,15 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - mlx5e_set_ttc_basic_params(priv, &ttc_params); - mlx5e_set_inner_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; - - err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", - err); - goto err_destroy_arfs_tables; - } - - mlx5e_set_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_inner_ttc_table; + goto err_destroy_arfs_tables; } return 0; -err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -365,17 +346,20 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_destroy_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); } static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - u16 max_nch = priv->max_nch; + struct mlx5e_lro_param lro_param; int err; + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) + return -ENOMEM; + mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -384,54 +368,38 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - err = mlx5e_create_indirect_rqt(priv); + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, true); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); - if (err) - goto err_destroy_indirect_tirs; - err = mlx5i_create_flow_steering(priv); if (err) - goto err_destroy_direct_tirs; + goto err_destroy_rx_res; return 0; -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); -err_destroy_indirect_rqts: - mlx5e_destroy_rqt(priv, &priv->indir_rqt); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; return err; } static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { - u16 max_nch = priv->max_nch; - mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; } /* The stats groups order is opposite to the update_stats() order calls */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 18ee21b06a00..5308f23702bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -149,7 +149,7 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = { .ndo_get_stats64 = mlx5i_get_stats, .ndo_uninit = mlx5i_pkey_dev_cleanup, .ndo_change_mtu = mlx5i_pkey_change_mtu, - .ndo_do_ioctl = mlx5i_pkey_ioctl, + .ndo_eth_ioctl = mlx5i_pkey_ioctl, }; /* Child NDOs */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c new file mode 100644 index 000000000000..749d17c0057d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/fs_ttc.h" + +#define MLX5_TTC_NUM_GROUPS 3 +#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT) +#define MLX5_TTC_GROUP2_SIZE BIT(1) +#define MLX5_TTC_GROUP3_SIZE BIT(0) +#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\ + MLX5_TTC_GROUP2_SIZE +\ + MLX5_TTC_GROUP3_SIZE) + +#define MLX5_INNER_TTC_NUM_GROUPS 3 +#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\ + MLX5_INNER_TTC_GROUP2_SIZE +\ + MLX5_INNER_TTC_GROUP3_SIZE) + +/* L3/L4 traffic type classifier */ +struct mlx5_ttc_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; + struct mlx5_ttc_rule rules[MLX5_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc) +{ + return ttc->t; +} + +static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { + mlx5_del_flow_rules(ttc->rules[i].rule); + ttc->rules[i].rule = NULL; + } + } + + for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } +} + +struct mlx5_etype_proto { + u16 etype; + u8 proto; +}; + +static struct mlx5_etype_proto ttc_rules[] = { + [MLX5_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_etype_proto ttc_tunnel_rules[] = { + [MLX5_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV4_IPIP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV6_IPIP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV4_IPV6] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPV6, + }, + [MLX5_TT_IPV6_IPV6] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPV6, + }, + +}; + +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt) +{ + return ttc_tunnel_rules[tt].proto; +} + +static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, + u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || + MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); + default: + return false; + } +} + +static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5_tunnel_proto_supported_rx(mdev, + ttc_tunnel_rules[tt].proto)) + return true; + } + return false; +} + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_tunnel_any_rx_proto_supported(mdev) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version)); +} + +static u8 mlx5_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + +static struct mlx5_flow_handle * +mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, u16 etype, u8 proto) +{ + int match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + } + + ipv = mlx5_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_flow_handle **trules; + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int tt; + int err; + + ft = ttc->t; + rules = ttc->rules; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev)) + return 0; + + trules = ttc->tunnel_rules; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (!mlx5_tunnel_proto_supported_rx(dev, + ttc_tunnel_rules[tt].proto)) + continue; + trules[tt] = mlx5_generate_ttc_rule(dev, ft, + ¶ms->tunnel_dests[tt], + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(trules[tt])) { + err = PTR_ERR(trules[tt]); + trules[tt] = NULL; + goto del_rules; + } + } + + return 0; + +del_rules: + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, + bool use_ipv) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_handle * +mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int err; + int tt; + + ft = ttc->t; + rules = ttc->rules; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, + ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + return 0; + +del_rules: + + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g), + GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_inner_ttc_table_groups(ttc); + if (err) + goto destroy_ft; + + err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc) +{ + int i; + + mlx5_cleanup_ttc_rules(ttc); + for (i = ttc->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ttc->g[i])) + mlx5_destroy_flow_group(ttc->g[i]); + ttc->g[i] = NULL; + } + + kfree(ttc->g); + mlx5_destroy_flow_table(ttc->t); + kvfree(ttc); +} + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + bool match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer); + if (err) + goto destroy_ft; + + err = mlx5_generate_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest) +{ + return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest, + NULL); +} + +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest; + + WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, + "TTC[%d] default dest is not setup yet", type); + + return *dest; +} + +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type); + + return mlx5_ttc_fwd_dest(ttc, type, &dest); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h new file mode 100644 index 000000000000..ce95be8f8382 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_FS_TTC_H__ +#define __ML5_FS_TTC_H__ + +#include <linux/mlx5/fs.h> + +enum mlx5_traffic_types { + MLX5_TT_IPV4_TCP, + MLX5_TT_IPV6_TCP, + MLX5_TT_IPV4_UDP, + MLX5_TT_IPV6_UDP, + MLX5_TT_IPV4_IPSEC_AH, + MLX5_TT_IPV6_IPSEC_AH, + MLX5_TT_IPV4_IPSEC_ESP, + MLX5_TT_IPV6_IPSEC_ESP, + MLX5_TT_IPV4, + MLX5_TT_IPV6, + MLX5_TT_ANY, + MLX5_NUM_TT, + MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY, +}; + +enum mlx5_tunnel_types { + MLX5_TT_IPV4_GRE, + MLX5_TT_IPV6_GRE, + MLX5_TT_IPV4_IPIP, + MLX5_TT_IPV6_IPIP, + MLX5_TT_IPV4_IPV6, + MLX5_TT_IPV6_IPV6, + MLX5_NUM_TUNNEL_TT, +}; + +struct mlx5_ttc_rule { + struct mlx5_flow_handle *rule; + struct mlx5_flow_destination default_dest; +}; + +struct mlx5_ttc_table; + +struct ttc_params { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table_attr ft_attr; + struct mlx5_flow_destination dests[MLX5_NUM_TT]; + bool inner_ttc; + struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest); +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt); + +#endif /* __MLX5_FS_TTC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index a0a059e0154f..d22219613719 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -199,7 +199,7 @@ static int mlxbf_gige_stop(struct net_device *netdev) return 0; } -static int mlxbf_gige_do_ioctl(struct net_device *netdev, +static int mlxbf_gige_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { if (!(netif_running(netdev))) @@ -253,7 +253,7 @@ static const struct net_device_ops mlxbf_gige_netdev_ops = { .ndo_start_xmit = mlxbf_gige_start_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mlxbf_gige_do_ioctl, + .ndo_eth_ioctl = mlxbf_gige_eth_ioctl, .ndo_set_rx_mode = mlxbf_gige_set_rx_mode, .ndo_get_stats64 = mlxbf_gige_get_stats64, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 88699e678544..081408e892d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1207,7 +1207,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, .ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port, - .ndo_do_ioctl = mlxsw_sp_port_ioctl, + .ndo_eth_ioctl = mlxsw_sp_port_ioctl, }; static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c5ef9aa64efe..f5d0d392efbf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -335,14 +335,16 @@ mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, static struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, - struct net_device *brport_dev) + struct net_device *brport_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_port *mlxsw_sp_port; + int err; bridge_port = kzalloc(sizeof(*bridge_port), GFP_KERNEL); if (!bridge_port) - return NULL; + return ERR_PTR(-ENOMEM); mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(brport_dev); bridge_port->lagged = mlxsw_sp_port->lagged; @@ -359,12 +361,23 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, list_add(&bridge_port->list, &bridge_device->ports_list); bridge_port->ref_count = 1; + err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev, + NULL, NULL, NULL, false, extack); + if (err) + goto err_switchdev_offload; + return bridge_port; + +err_switchdev_offload: + list_del(&bridge_port->list); + kfree(bridge_port); + return ERR_PTR(err); } static void mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) { + switchdev_bridge_port_unoffload(bridge_port->dev, NULL, NULL, NULL); list_del(&bridge_port->list); WARN_ON(!list_empty(&bridge_port->vlans_list)); kfree(bridge_port); @@ -390,9 +403,10 @@ mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, if (IS_ERR(bridge_device)) return ERR_CAST(bridge_device); - bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev); - if (!bridge_port) { - err = -ENOMEM; + bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev, + extack); + if (IS_ERR(bridge_port)) { + err = PTR_ERR(bridge_port); goto err_bridge_port_create; } @@ -1569,7 +1583,6 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, { long *flood_bitmap; int num_of_ports; - int alloc_size; u16 mid_idx; int err; @@ -1579,18 +1592,17 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, return false; num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core); - alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports); - flood_bitmap = kzalloc(alloc_size, GFP_KERNEL); + flood_bitmap = bitmap_alloc(num_of_ports, GFP_KERNEL); if (!flood_bitmap) return false; - bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); + bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); mid->mid = mid_idx; err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap, bridge_device->mrouter); - kfree(flood_bitmap); + bitmap_free(flood_bitmap); if (err) return false; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 831518466de2..3f69bb59ba49 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -689,7 +689,7 @@ static int ks8851_net_ioctl(struct net_device *dev, struct ifreq *req, int cmd) static const struct net_device_ops ks8851_netdev_ops = { .ndo_open = ks8851_net_open, .ndo_stop = ks8851_net_stop, - .ndo_do_ioctl = ks8851_net_ioctl, + .ndo_eth_ioctl = ks8851_net_ioctl, .ndo_start_xmit = ks8851_start_xmit, .ndo_set_mac_address = ks8851_set_mac_address, .ndo_set_rx_mode = ks8851_set_rx_mode, diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 7945eb5e2fe8..a0ee155f9f51 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -6738,7 +6738,7 @@ static const struct net_device_ops netdev_ops = { .ndo_set_features = netdev_set_features, .ndo_set_mac_address = netdev_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_set_rx_mode = netdev_set_rx_mode, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = netdev_netpoll, diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index dae10328c6cf..9e8561cdc32a 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -2655,7 +2655,7 @@ static const struct net_device_ops lan743x_netdev_ops = { .ndo_open = lan743x_netdev_open, .ndo_stop = lan743x_netdev_close, .ndo_start_xmit = lan743x_netdev_xmit_frame, - .ndo_do_ioctl = lan743x_netdev_ioctl, + .ndo_eth_ioctl = lan743x_netdev_ioctl, .ndo_set_rx_mode = lan743x_netdev_set_multicast, .ndo_change_mtu = lan743x_netdev_change_mtu, .ndo_get_stats64 = lan743x_netdev_get_stats64, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index a72e3b3b596e..649ca609884a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -93,9 +93,12 @@ static int sparx5_port_attr_set(struct net_device *dev, const void *ctx, } static int sparx5_port_bridge_join(struct sparx5_port *port, - struct net_device *bridge) + struct net_device *bridge, + struct netlink_ext_ack *extack) { struct sparx5 *sparx5 = port->sparx5; + struct net_device *ndev = port->ndev; + int err; if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS)) /* First bridged port */ @@ -109,12 +112,21 @@ static int sparx5_port_bridge_join(struct sparx5_port *port, set_bit(port->portno, sparx5->bridge_mask); + err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL, + false, extack); + if (err) + goto err_switchdev_offload; + /* Port enters in bridge mode therefor don't need to copy to CPU * frames for multicast in case the bridge is not requesting them */ - __dev_mc_unsync(port->ndev, sparx5_mc_unsync); + __dev_mc_unsync(ndev, sparx5_mc_unsync); return 0; + +err_switchdev_offload: + clear_bit(port->portno, sparx5->bridge_mask); + return err; } static void sparx5_port_bridge_leave(struct sparx5_port *port, @@ -122,6 +134,8 @@ static void sparx5_port_bridge_leave(struct sparx5_port *port, { struct sparx5 *sparx5 = port->sparx5; + switchdev_bridge_port_unoffload(port->ndev, NULL, NULL, NULL); + clear_bit(port->portno, sparx5->bridge_mask); if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS)) sparx5->hw_bridge_dev = NULL; @@ -139,11 +153,15 @@ static int sparx5_port_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct sparx5_port *port = netdev_priv(dev); + struct netlink_ext_ack *extack; int err = 0; + extack = netdev_notifier_info_to_extack(&info->info); + if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) - err = sparx5_port_bridge_join(port, info->upper_dev); + err = sparx5_port_bridge_join(port, info->upper_dev, + extack); else sparx5_port_bridge_leave(port, info->upper_dev); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index e9d260d84bf3..de900ea70fd4 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -823,7 +823,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_vlan_rx_kill_vid = ocelot_vlan_rx_kill_vid, .ndo_set_features = ocelot_set_features, .ndo_setup_tc = ocelot_setup_tc, - .ndo_do_ioctl = ocelot_ioctl, + .ndo_eth_ioctl = ocelot_ioctl, .ndo_get_devlink_port = ocelot_get_devlink_port, }; @@ -1154,38 +1154,19 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port, struct net_device *bridge_dev, struct netlink_ext_ack *extack) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct ocelot_port_private *priv; clock_t ageing_time; u8 stp_state; - int err; - - priv = container_of(ocelot_port, struct ocelot_port_private, port); ocelot_inherit_brport_flags(ocelot, port, brport_dev); stp_state = br_port_get_stp_state(brport_dev); ocelot_bridge_stp_state_set(ocelot, port, stp_state); - err = ocelot_port_vlan_filtering(ocelot, port, - br_vlan_enabled(bridge_dev)); - if (err) - return err; - ageing_time = br_get_ageing_time(bridge_dev); ocelot_port_attr_ageing_set(ocelot, port, ageing_time); - err = br_mdb_replay(bridge_dev, brport_dev, priv, true, - &ocelot_switchdev_blocking_nb, extack); - if (err && err != -EOPNOTSUPP) - return err; - - err = br_vlan_replay(bridge_dev, brport_dev, priv, true, - &ocelot_switchdev_blocking_nb, extack); - if (err && err != -EOPNOTSUPP) - return err; - - return 0; + return ocelot_port_vlan_filtering(ocelot, port, + br_vlan_enabled(bridge_dev)); } static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port) @@ -1216,6 +1197,13 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, ocelot_port_bridge_join(ocelot, port, bridge); + err = switchdev_bridge_port_offload(brport_dev, dev, priv, + &ocelot_netdevice_nb, + &ocelot_switchdev_blocking_nb, + false, extack); + if (err) + goto err_switchdev_offload; + err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack); if (err) goto err_switchdev_sync; @@ -1223,10 +1211,24 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, return 0; err_switchdev_sync: + switchdev_bridge_port_unoffload(brport_dev, priv, + &ocelot_netdevice_nb, + &ocelot_switchdev_blocking_nb); +err_switchdev_offload: ocelot_port_bridge_leave(ocelot, port, bridge); return err; } +static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev, + struct net_device *brport_dev) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + + switchdev_bridge_port_unoffload(brport_dev, priv, + &ocelot_netdevice_nb, + &ocelot_switchdev_blocking_nb); +} + static int ocelot_netdevice_bridge_leave(struct net_device *dev, struct net_device *brport_dev, struct net_device *bridge) @@ -1279,6 +1281,18 @@ err_bridge_join: return err; } +static void ocelot_netdevice_pre_lag_leave(struct net_device *dev, + struct net_device *bond) +{ + struct net_device *bridge_dev; + + bridge_dev = netdev_master_upper_dev_get(bond); + if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) + return; + + ocelot_netdevice_pre_bridge_leave(dev, bond); +} + static int ocelot_netdevice_lag_leave(struct net_device *dev, struct net_device *bond) { @@ -1356,6 +1370,43 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev, } static int +ocelot_netdevice_prechangeupper(struct net_device *dev, + struct net_device *brport_dev, + struct netdev_notifier_changeupper_info *info) +{ + if (netif_is_bridge_master(info->upper_dev) && !info->linking) + ocelot_netdevice_pre_bridge_leave(dev, brport_dev); + + if (netif_is_lag_master(info->upper_dev) && !info->linking) + ocelot_netdevice_pre_lag_leave(dev, info->upper_dev); + + return NOTIFY_DONE; +} + +static int +ocelot_netdevice_lag_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct ocelot_port_private *priv = netdev_priv(lower); + struct ocelot_port *ocelot_port = &priv->port; + + if (ocelot_port->bond != dev) + return NOTIFY_OK; + + err = ocelot_netdevice_prechangeupper(dev, lower, info); + if (err) + return err; + } + + return NOTIFY_DONE; +} + +static int ocelot_netdevice_changelowerstate(struct net_device *dev, struct netdev_lag_lower_state_info *info) { @@ -1382,6 +1433,17 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { + case NETDEV_PRECHANGEUPPER: { + struct netdev_notifier_changeupper_info *info = ptr; + + if (ocelot_netdevice_dev_check(dev)) + return ocelot_netdevice_prechangeupper(dev, dev, info); + + if (netif_is_lag_master(dev)) + return ocelot_netdevice_lag_prechangeupper(dev, info); + + break; + } case NETDEV_CHANGEUPPER: { struct netdev_notifier_changeupper_info *info = ptr; diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c index ce3eca5d152b..d74a80f010c5 100644 --- a/drivers/net/ethernet/natsemi/jazzsonic.c +++ b/drivers/net/ethernet/natsemi/jazzsonic.c @@ -193,8 +193,6 @@ static int jazz_sonic_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); platform_set_drvdata(pdev, dev); - netdev_boot_setup_check(dev); - dev->base_addr = res->start; dev->irq = platform_get_irq(pdev, 0); err = sonic_probe1(dev); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 84f7dbe9edff..3f982033944b 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -790,7 +790,7 @@ static const struct net_device_ops natsemi_netdev_ops = { .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, .ndo_change_mtu = natsemi_change_mtu, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = ns_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 28d9e98db81a..ca4686094701 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -215,7 +215,6 @@ int xtsonic_probe(struct platform_device *pdev) lp->device = &pdev->dev; platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); - netdev_boot_setup_check(dev); dev->base_addr = resmem->start; dev->irq = resirq->start; diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 0b017d4f5c08..09c0e839cca5 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7625,7 +7625,7 @@ static const struct net_device_ops s2io_netdev_ops = { .ndo_start_xmit = s2io_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = s2io_ndo_set_multicast, - .ndo_do_ioctl = s2io_ioctl, + .ndo_eth_ioctl = s2io_ioctl, .ndo_set_mac_address = s2io_set_mac_addr, .ndo_change_mtu = s2io_change_mtu, .ndo_set_features = s2io_set_features, diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 7abd13e69471..df4a3f3da83a 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3339,7 +3339,7 @@ static const struct net_device_ops vxge_netdev_ops = { .ndo_start_xmit = vxge_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = vxge_set_multicast, - .ndo_do_ioctl = vxge_ioctl, + .ndo_eth_ioctl = vxge_ioctl, .ndo_set_mac_address = vxge_set_mac_addr, .ndo_change_mtu = vxge_change_mtu, .ndo_fix_features = vxge_fix_features, diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index b82758d5beed..8844d1ac053a 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -23,6 +23,7 @@ config NFP depends on TLS && TLS_DEVICE || TLS_DEVICE=n select NET_DEVLINK select CRC32 + select DIMLIB help This driver supports the Netronome(R) NFP4000/NFP6000 based cards working as a advanced Ethernet NIC. It works with both diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1cbe2c9f3959..2a432de11858 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -262,10 +262,10 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, } static bool -nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx) +nfp_flower_tun_is_gre(struct flow_rule *rule, int start_idx) { - struct flow_action_entry *act = flow->rule->action.entries; - int num_act = flow->rule->action.num_entries; + struct flow_action_entry *act = rule->action.entries; + int num_act = rule->action.num_entries; int act_idx; /* Preparse action list for next mirred or redirect action */ @@ -279,7 +279,7 @@ nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx) static enum nfp_flower_tun_type nfp_fl_get_tun_from_act(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, const struct flow_action_entry *act, int act_idx) { const struct ip_tunnel_info *tun = act->tunnel; @@ -288,7 +288,7 @@ nfp_fl_get_tun_from_act(struct nfp_app *app, /* Determine the tunnel type based on the egress netdev * in the mirred action for tunnels without l4. */ - if (nfp_flower_tun_is_gre(flow, act_idx)) + if (nfp_flower_tun_is_gre(rule, act_idx)) return NFP_FL_TUNNEL_GRE; switch (tun->key.tp_dst) { @@ -788,11 +788,10 @@ struct nfp_flower_pedit_acts { }; static int -nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action, +nfp_fl_commit_mangle(struct flow_rule *rule, char *nfp_action, int *a_len, struct nfp_flower_pedit_acts *set_act, u32 *csum_updated) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); size_t act_size = 0; u8 ip_proto = 0; @@ -890,7 +889,7 @@ nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action, static int nfp_fl_pedit(const struct flow_action_entry *act, - struct flow_cls_offload *flow, char *nfp_action, int *a_len, + char *nfp_action, int *a_len, u32 *csum_updated, struct nfp_flower_pedit_acts *set_act, struct netlink_ext_ack *extack) { @@ -977,7 +976,7 @@ nfp_flower_output_action(struct nfp_app *app, static int nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, @@ -1045,7 +1044,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, case FLOW_ACTION_TUNNEL_ENCAP: { const struct ip_tunnel_info *ip_tun = act->tunnel; - *tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx); + *tun_type = nfp_fl_get_tun_from_act(app, rule, act, act_idx); if (*tun_type == NFP_FL_TUNNEL_NONE) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list"); return -EOPNOTSUPP; @@ -1086,7 +1085,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, /* Tunnel decap is handled by default so accept action. */ return 0; case FLOW_ACTION_MANGLE: - if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len], + if (nfp_fl_pedit(act, &nfp_fl->action_data[*a_len], a_len, csum_updated, set_act, extack)) return -EOPNOTSUPP; break; @@ -1195,7 +1194,7 @@ static bool nfp_fl_check_mangle_end(struct flow_action *flow_act, } int nfp_flower_compile_action(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, struct netlink_ext_ack *extack) @@ -1207,7 +1206,7 @@ int nfp_flower_compile_action(struct nfp_app *app, bool pkt_host = false; u32 csum_updated = 0; - if (!flow_action_hw_stats_check(&flow->rule->action, extack, + if (!flow_action_hw_stats_check(&rule->action, extack, FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; @@ -1219,18 +1218,18 @@ int nfp_flower_compile_action(struct nfp_app *app, tun_out_cnt = 0; out_cnt = 0; - flow_action_for_each(i, act, &flow->rule->action) { - if (nfp_fl_check_mangle_start(&flow->rule->action, i)) + flow_action_for_each(i, act, &rule->action) { + if (nfp_fl_check_mangle_start(&rule->action, i)) memset(&set_act, 0, sizeof(set_act)); - err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len, + err = nfp_flower_loop_action(app, act, rule, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, &out_cnt, &csum_updated, &set_act, &pkt_host, extack, i); if (err) return err; act_cnt++; - if (nfp_fl_check_mangle_end(&flow->rule->action, i)) - nfp_fl_commit_mangle(flow, + if (nfp_fl_check_mangle_end(&rule->action, i)) + nfp_fl_commit_mangle(rule, &nfp_flow->action_data[act_len], &act_len, &set_act, &csum_updated); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 062bb2db68bf..bfd7d1c35076 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -2,6 +2,7 @@ /* Copyright (C) 2021 Corigine, Inc. */ #include "conntrack.h" +#include "../nfp_port.h" const struct rhashtable_params nfp_tc_ct_merge_params = { .head_offset = offsetof(struct nfp_fl_ct_tc_merge, @@ -407,15 +408,491 @@ static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry, return -EINVAL; } +static int +nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) +{ + int key_size; + + /* This field must always be present */ + key_size = sizeof(struct nfp_flower_meta_tci); + map[FLOW_PAY_META_TCI] = 0; + + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) { + map[FLOW_PAY_EXT_META] = key_size; + key_size += sizeof(struct nfp_flower_ext_meta); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) { + map[FLOW_PAY_INPORT] = key_size; + key_size += sizeof(struct nfp_flower_in_port); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) { + map[FLOW_PAY_MAC_MPLS] = key_size; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) { + map[FLOW_PAY_L4] = key_size; + key_size += sizeof(struct nfp_flower_tp_ports); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) { + map[FLOW_PAY_IPV4] = key_size; + key_size += sizeof(struct nfp_flower_ipv4); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) { + map[FLOW_PAY_IPV6] = key_size; + key_size += sizeof(struct nfp_flower_ipv6); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + map[FLOW_PAY_GRE] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_gre_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_gre_tun); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) { + map[FLOW_PAY_QINQ] = key_size; + key_size += sizeof(struct nfp_flower_vlan); + } + + if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) || + (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) { + map[FLOW_PAY_UDP_TUN] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + map[FLOW_PAY_GENEVE_OPT] = key_size; + key_size += sizeof(struct nfp_flower_geneve_options); + } + + return key_size; +} + +static int nfp_fl_merge_actions_offload(struct flow_rule **rules, + struct nfp_flower_priv *priv, + struct net_device *netdev, + struct nfp_fl_payload *flow_pay) +{ + struct flow_action_entry *a_in; + int i, j, num_actions, id; + struct flow_rule *a_rule; + int err = 0, offset = 0; + + num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries + + rules[CT_TYPE_NFT]->action.num_entries + + rules[CT_TYPE_POST_CT]->action.num_entries; + + a_rule = flow_rule_alloc(num_actions); + if (!a_rule) + return -ENOMEM; + + /* Actions need a BASIC dissector. */ + a_rule->match = rules[CT_TYPE_PRE_CT]->match; + + /* Copy actions */ + for (j = 0; j < _CT_TYPE_MAX; j++) { + if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + /* ip_proto is the only field that needed in later compile_action, + * needed to set the correct checksum flags. It doesn't really matter + * which input rule's ip_proto field we take as the earlier merge checks + * would have made sure that they don't conflict. We do not know which + * of the subflows would have the ip_proto filled in, so we need to iterate + * through the subflows and assign the proper subflow to a_rule + */ + flow_rule_match_basic(rules[j], &match); + if (match.mask->ip_proto) + a_rule->match = rules[j]->match; + } + + for (i = 0; i < rules[j]->action.num_entries; i++) { + a_in = &rules[j]->action.entries[i]; + id = a_in->id; + + /* Ignore CT related actions as these would already have + * been taken care of by previous checks, and we do not send + * any CT actions to the firmware. + */ + switch (id) { + case FLOW_ACTION_CT: + case FLOW_ACTION_GOTO: + case FLOW_ACTION_CT_METADATA: + continue; + default: + memcpy(&a_rule->action.entries[offset++], + a_in, sizeof(struct flow_action_entry)); + break; + } + } + } + + /* Some actions would have been ignored, so update the num_entries field */ + a_rule->action.num_entries = offset; + err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL); + kfree(a_rule); + + return err; +} + static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) { - return 0; + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_fl_ct_zone_entry *zt = m_entry->zt; + struct nfp_fl_key_ls key_layer, tmp_layer; + struct nfp_flower_priv *priv = zt->priv; + u16 key_map[_FLOW_PAY_LAYERS_MAX]; + struct nfp_fl_payload *flow_pay; + + struct flow_rule *rules[_CT_TYPE_MAX]; + u8 *key, *msk, *kdata, *mdata; + struct nfp_port *port = NULL; + struct net_device *netdev; + bool qinq_sup; + u32 port_id; + u16 offset; + int i, err; + + netdev = m_entry->netdev; + qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); + + rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule; + rules[CT_TYPE_NFT] = m_entry->nft_parent->rule; + rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule; + + memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls)); + memset(&key_map, 0, sizeof(key_map)); + + /* Calculate the resultant key layer and size for offload */ + for (i = 0; i < _CT_TYPE_MAX; i++) { + err = nfp_flower_calculate_key_layers(priv->app, + m_entry->netdev, + &tmp_layer, rules[i], + &tun_type, NULL); + if (err) + return err; + + key_layer.key_layer |= tmp_layer.key_layer; + key_layer.key_layer_two |= tmp_layer.key_layer_two; + } + key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map); + + flow_pay = nfp_flower_allocate_new(&key_layer); + if (!flow_pay) + return -ENOMEM; + + memset(flow_pay->unmasked_data, 0, key_layer.key_size); + memset(flow_pay->mask_data, 0, key_layer.key_size); + + kdata = flow_pay->unmasked_data; + mdata = flow_pay->mask_data; + + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + key_layer.key_layer); + + if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) { + offset = key_map[FLOW_PAY_EXT_META]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key, + key_layer.key_layer_two); + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk, + key_layer.key_layer_two); + } + + /* Using in_port from the -trk rule. The tc merge checks should already + * be checking that the ingress netdevs are the same + */ + port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev); + offset = key_map[FLOW_PAY_INPORT]; + key = kdata + offset; + msk = mdata + offset; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)key, + port_id, false, tun_type, NULL); + if (err) + goto ct_offload_err; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, + port_id, true, tun_type, NULL); + if (err) + goto ct_offload_err; + + /* This following part works on the assumption that previous checks has + * already filtered out flows that has different values for the different + * layers. Here we iterate through all three rules and merge their respective + * masked value(cared bits), basic method is: + * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask) + * final_mask = r1_mask | r2_mask | r3_mask + * If none of the rules contains a match that is also fine, that simply means + * that the layer is not present. + */ + if (!qinq_sup) { + for (i = 0; i < _CT_TYPE_MAX; i++) { + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) { + offset = key_map[FLOW_PAY_MAC_MPLS]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i]); + err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i], NULL); + if (err) + goto ct_offload_err; + } + } + + if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key, + (struct nfp_flower_ipv4 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV6]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key, + (struct nfp_flower_ipv6 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) { + offset = key_map[FLOW_PAY_L4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key, + (struct nfp_flower_tp_ports *)msk, + rules[i]); + } + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + offset = key_map[FLOW_PAY_GRE]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6_gre_tun((void *)key, + (void *)msk, rules[i]); + } + gre_match = (struct nfp_flower_ipv6_gre_tun *)key; + dst = &gre_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) { + err = -ENOMEM; + goto ct_offload_err; + } + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4_gre_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + } + + if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) { + offset = key_map[FLOW_PAY_QINQ]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, + (struct nfp_flower_vlan *)msk, + rules[i]); + } + } + + if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN || + key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { + offset = key_map[FLOW_PAY_UDP_TUN]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6_udp_tun((void *)key, + (void *)msk, rules[i]); + } + udp_match = (struct nfp_flower_ipv6_udp_tun *)key; + dst = &udp_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) { + err = -ENOMEM; + goto ct_offload_err; + } + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4_udp_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + offset = key_map[FLOW_PAY_GENEVE_OPT]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) + nfp_flower_compile_geneve_opt(key, msk, rules[i]); + } + } + + /* Merge actions into flow_pay */ + err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay); + if (err) + goto ct_offload_err; + + /* Use the pointer address as the cookie, but set the last bit to 1. + * This is to avoid the 'is_merge_flow' check from detecting this as + * an already merged flow. This works since address alignment means + * that the last bit for pointer addresses will be 0. + */ + flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1; + err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie, + flow_pay, netdev, NULL); + if (err) + goto ct_offload_err; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, + nfp_flower_table_params); + if (err) + goto ct_release_offload_meta_err; + + err = nfp_flower_xmit_flow(priv->app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + if (err) + goto ct_remove_rhash_err; + + m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie; + m_entry->flow_pay = flow_pay; + + if (port) + port->tc_offload_cnt++; + + return err; + +ct_remove_rhash_err: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); +ct_release_offload_meta_err: + nfp_modify_flow_metadata(priv->app, flow_pay); +ct_offload_err: + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr); + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6); + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + kfree(flow_pay); + return err; } static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie, struct net_device *netdev) { - return 0; + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *flow_pay; + struct nfp_port *port = NULL; + int err = 0; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + flow_pay = nfp_flower_search_fl_table(app, cookie, netdev); + if (!flow_pay) + return -ENOENT; + + err = nfp_modify_flow_metadata(app, flow_pay); + if (err) + goto err_free_merge_flow; + + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr); + + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); + + if (!flow_pay->in_hw) { + err = 0; + goto err_free_merge_flow; + } + + err = nfp_flower_xmit_flow(app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + +err_free_merge_flow: + nfp_flower_del_linked_merge_flows(app, flow_pay); + if (port) + port->tc_offload_cnt--; + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); + kfree_rcu(flow_pay, rcu); + return err; } static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, @@ -1048,6 +1525,139 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, return 0; } +static void +nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge, + enum ct_entry_type type, u64 *m_pkts, + u64 *m_bytes, u64 *m_used) +{ + struct nfp_flower_priv *priv = nft_merge->zt->priv; + struct nfp_fl_payload *nfp_flow; + u32 ctx_id; + + nfp_flow = nft_merge->flow_pay; + if (!nfp_flow) + return; + + ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + *m_pkts += priv->stats[ctx_id].pkts; + *m_bytes += priv->stats[ctx_id].bytes; + *m_used = max_t(u64, *m_used, priv->stats[ctx_id].used); + + /* If request is for a sub_flow which is part of a tunnel merged + * flow then update stats from tunnel merged flows first. + */ + if (!list_empty(&nfp_flow->linked_flows)) + nfp_flower_update_merge_stats(priv->app, nfp_flow); + + if (type != CT_TYPE_NFT) { + /* Update nft cached stats */ + flow_stats_update(&nft_merge->nft_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } else { + /* Update pre_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Update post_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } + /* Reset stats from the nfp */ + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; +} + +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent) +{ + struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry; + struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp; + struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp; + + u64 pkts = 0, bytes = 0, used = 0; + u64 m_pkts, m_bytes, m_used; + + spin_lock_bh(&ct_entry->zt->priv->stats_lock); + + if (ct_entry->type == CT_TYPE_PRE_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + pre_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update post_ct partner */ + flow_stats_update(&tc_merge->post_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else if (ct_entry->type == CT_TYPE_POST_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + post_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update pre_ct partner */ + flow_stats_update(&tc_merge->pre_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else { + /* Iterate nft_merge entries associated with this nft flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children, + nft_flow_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT, + &pkts, &bytes, &used); + } + } + + /* Add stats from this request to stats potentially cached by + * previous requests. + */ + flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Finally update the flow stats from the original stats request */ + flow_stats_update(&flow->stats, ct_entry->stats.bytes, + ct_entry->stats.pkts, 0, + ct_entry->stats.lastused, + FLOW_ACTION_HW_STATS_DELAYED); + /* Stats has been synced to original flow, can now clear + * the cache. + */ + ct_entry->stats.pkts = 0; + ct_entry->stats.bytes = 0; + spin_unlock_bh(&ct_entry->zt->priv->stats_lock); + + return 0; +} + static int nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow) { @@ -1080,7 +1690,11 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl nfp_ct_map_params); return nfp_fl_ct_del_flow(ct_map_ent); case FLOW_CLS_STATS: - return 0; + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + break; default: break; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 170b6cdb8cd0..beb6cceff9d8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -83,6 +83,24 @@ enum ct_entry_type { CT_TYPE_PRE_CT, CT_TYPE_NFT, CT_TYPE_POST_CT, + _CT_TYPE_MAX, +}; + +enum nfp_nfp_layer_name { + FLOW_PAY_META_TCI = 0, + FLOW_PAY_INPORT, + FLOW_PAY_EXT_META, + FLOW_PAY_MAC_MPLS, + FLOW_PAY_L4, + FLOW_PAY_IPV4, + FLOW_PAY_IPV6, + FLOW_PAY_CT, + FLOW_PAY_GRE, + FLOW_PAY_QINQ, + FLOW_PAY_UDP_TUN, + FLOW_PAY_GENEVE_OPT, + + _FLOW_PAY_LAYERS_MAX }; /** @@ -228,4 +246,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent); */ int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb_priv); + +/** + * nfp_fl_ct_stats() - Handle flower stats callbacks for ct flows + * @flow: TC flower classifier offload structure. + * @ct_map_ent: ct map entry for the flow that needs deleting + */ +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 0fbd682ccf72..917c450a7aad 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -413,20 +413,73 @@ int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow1, struct nfp_fl_payload *sub_flow2); +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type); +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext); +int +nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, + bool mask_version, enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule); +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, + struct nfp_flower_tp_ports *msk, + struct flow_rule *rule); +void +nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, + struct nfp_flower_ipv4 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, + struct nfp_flower_ipv6 *msk, struct flow_rule *rule); +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, + struct nfp_flower_ipv4_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule); int nfp_flower_compile_flow_match(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack); int nfp_flower_compile_action(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, struct netlink_ext_ack *extack); -int nfp_compile_flow_metadata(struct nfp_app *app, - struct flow_cls_offload *flow, +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, struct nfp_fl_payload *nfp_flow, struct net_device *netdev, struct netlink_ext_ack *extack); @@ -498,4 +551,22 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, struct nfp_fl_payload *flow); int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, struct nfp_fl_payload *flow); + +struct nfp_fl_payload * +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer); +int nfp_flower_calculate_key_layers(struct nfp_app *app, + struct net_device *netdev, + struct nfp_fl_key_ls *ret_key_ls, + struct flow_rule *flow, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_del_linked_merge_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); +int +nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, + u8 mtype); +void +nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 255a4dff6288..9d86eea4dc16 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -7,51 +7,68 @@ #include "cmsg.h" #include "main.h" -static void -nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, - struct nfp_flower_meta_tci *msk, - struct flow_rule *rule, u8 key_type, bool qinq_sup) +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type) { - u16 tmp_tci; - - memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); - memset(msk, 0, sizeof(struct nfp_flower_meta_tci)); - /* Populate the metadata frame. */ ext->nfp_flow_key_layer = key_type; ext->mask_id = ~0; msk->nfp_flow_key_layer = key_type; msk->mask_id = ~0; +} - if (!qinq_sup && flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule) +{ + u16 msk_tci, key_tci; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { struct flow_match_vlan match; flow_rule_match_vlan(rule, &match); /* Populate the tci field. */ - tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; - tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, match.key->vlan_priority) | FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, match.key->vlan_id); - ext->tci = cpu_to_be16(tmp_tci); - tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; - tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, match.mask->vlan_priority) | FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, match.mask->vlan_id); - msk->tci = cpu_to_be16(tmp_tci); + + ext->tci |= cpu_to_be16((key_tci & msk_tci)); + msk->tci |= cpu_to_be16(msk_tci); } } static void +nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule, u8 key_type, bool qinq_sup) +{ + memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); + memset(msk, 0, sizeof(struct nfp_flower_meta_tci)); + + nfp_flower_compile_meta(ext, msk, key_type); + + if (!qinq_sup) + nfp_flower_compile_tci(ext, msk, rule); +} + +void nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext) { frame->nfp_flow_key_layer2 = cpu_to_be32(key_ext); } -static int +int nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, bool mask_version, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack) @@ -74,28 +91,37 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, return 0; } -static int +void nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, - struct nfp_flower_mac_mpls *msk, struct flow_rule *rule, - struct netlink_ext_ack *extack) + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); - memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match; + int i; flow_rule_match_eth_addrs(rule, &match); /* Populate mac frame. */ - ether_addr_copy(ext->mac_dst, &match.key->dst[0]); - ether_addr_copy(ext->mac_src, &match.key->src[0]); - ether_addr_copy(msk->mac_dst, &match.mask->dst[0]); - ether_addr_copy(msk->mac_src, &match.mask->src[0]); + for (i = 0; i < ETH_ALEN; i++) { + ext->mac_dst[i] |= match.key->dst[i] & + match.mask->dst[i]; + msk->mac_dst[i] |= match.mask->dst[i]; + ext->mac_src[i] |= match.key->src[i] & + match.mask->src[i]; + msk->mac_src[i] |= match.mask->src[i]; + } } +} +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { struct flow_match_mpls match; - u32 t_mpls; + u32 key_mpls, msk_mpls; flow_rule_match_mpls(rule, &match); @@ -106,22 +132,24 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, return -EOPNOTSUPP; } - t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, - match.key->ls[0].mpls_label) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, - match.key->ls[0].mpls_tc) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, - match.key->ls[0].mpls_bos) | - NFP_FLOWER_MASK_MPLS_Q; - ext->mpls_lse = cpu_to_be32(t_mpls); - t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, - match.mask->ls[0].mpls_label) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, - match.mask->ls[0].mpls_tc) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, - match.mask->ls[0].mpls_bos) | - NFP_FLOWER_MASK_MPLS_Q; - msk->mpls_lse = cpu_to_be32(t_mpls); + key_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.key->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.key->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.key->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + msk_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.mask->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.mask->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.mask->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + ext->mpls_lse |= cpu_to_be32((key_mpls & msk_mpls)); + msk->mpls_lse |= cpu_to_be32(msk_mpls); } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q * bit, which indicates an mpls ether type but without any @@ -132,30 +160,41 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, flow_rule_match_basic(rule, &match); if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) { - ext->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); - msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + ext->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + msk->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); } } return 0; } -static void +static int +nfp_flower_compile_mac_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ + memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); + memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); + + nfp_flower_compile_mac(ext, msk, rule); + + return nfp_flower_compile_mpls(ext, msk, rule, extack); +} + +void nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, struct nfp_flower_tp_ports *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_tp_ports)); - memset(msk, 0, sizeof(struct nfp_flower_tp_ports)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; flow_rule_match_ports(rule, &match); - ext->port_src = match.key->src; - ext->port_dst = match.key->dst; - msk->port_src = match.mask->src; - msk->port_dst = match.mask->dst; + ext->port_src |= match.key->src & match.mask->src; + ext->port_dst |= match.key->dst & match.mask->dst; + msk->port_src |= match.mask->src; + msk->port_dst |= match.mask->dst; } } @@ -167,18 +206,18 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, struct flow_match_basic match; flow_rule_match_basic(rule, &match); - ext->proto = match.key->ip_proto; - msk->proto = match.mask->ip_proto; + ext->proto |= match.key->ip_proto & match.mask->ip_proto; + msk->proto |= match.mask->ip_proto; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { struct flow_match_ip match; flow_rule_match_ip(rule, &match); - ext->tos = match.key->tos; - ext->ttl = match.key->ttl; - msk->tos = match.mask->tos; - msk->ttl = match.mask->ttl; + ext->tos |= match.key->tos & match.mask->tos; + ext->ttl |= match.key->ttl & match.mask->ttl; + msk->tos |= match.mask->tos; + msk->ttl |= match.mask->ttl; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { @@ -231,99 +270,108 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, } static void -nfp_flower_fill_vlan(struct flow_dissector_key_vlan *key, - struct nfp_flower_vlan *frame, - bool outer_vlan) +nfp_flower_fill_vlan(struct flow_match_vlan *match, + struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, bool outer_vlan) { - u16 tci; - - tci = NFP_FLOWER_MASK_VLAN_PRESENT; - tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - key->vlan_priority) | - FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - key->vlan_id); + struct flow_dissector_key_vlan *mask = match->mask; + struct flow_dissector_key_vlan *key = match->key; + u16 msk_tci, key_tci; + + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + key->vlan_id); + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + mask->vlan_id); if (outer_vlan) { - frame->outer_tci = cpu_to_be16(tci); - frame->outer_tpid = key->vlan_tpid; + ext->outer_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->outer_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->outer_tci |= cpu_to_be16(msk_tci); + msk->outer_tpid |= mask->vlan_tpid; } else { - frame->inner_tci = cpu_to_be16(tci); - frame->inner_tpid = key->vlan_tpid; + ext->inner_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->inner_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->inner_tci |= cpu_to_be16(msk_tci); + msk->inner_tpid |= mask->vlan_tpid; } } -static void +void nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, struct nfp_flower_vlan *msk, struct flow_rule *rule) { struct flow_match_vlan match; - memset(ext, 0, sizeof(struct nfp_flower_vlan)); - memset(msk, 0, sizeof(struct nfp_flower_vlan)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { flow_rule_match_vlan(rule, &match); - nfp_flower_fill_vlan(match.key, ext, true); - nfp_flower_fill_vlan(match.mask, msk, true); + nfp_flower_fill_vlan(&match, ext, msk, true); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { flow_rule_match_cvlan(rule, &match); - nfp_flower_fill_vlan(match.key, ext, false); - nfp_flower_fill_vlan(match.mask, msk, false); + nfp_flower_fill_vlan(&match, ext, msk, false); } } -static void +void nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, struct nfp_flower_ipv4 *msk, struct flow_rule *rule) { - struct flow_match_ipv4_addrs match; - - memset(ext, 0, sizeof(struct nfp_flower_ipv4)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + flow_rule_match_ipv4_addrs(rule, &match); - ext->ipv4_src = match.key->src; - ext->ipv4_dst = match.key->dst; - msk->ipv4_src = match.mask->src; - msk->ipv4_dst = match.mask->dst; + ext->ipv4_src |= match.key->src & match.mask->src; + ext->ipv4_dst |= match.key->dst & match.mask->dst; + msk->ipv4_src |= match.mask->src; + msk->ipv4_dst |= match.mask->dst; } nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } -static void +void nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, struct nfp_flower_ipv6 *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv6)); - memset(msk, 0, sizeof(struct nfp_flower_ipv6)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; + int i; flow_rule_match_ipv6_addrs(rule, &match); - ext->ipv6_src = match.key->src; - ext->ipv6_dst = match.key->dst; - msk->ipv6_src = match.mask->src; - msk->ipv6_dst = match.mask->dst; + for (i = 0; i < sizeof(ext->ipv6_src); i++) { + ext->ipv6_src.s6_addr[i] |= match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->ipv6_dst.s6_addr[i] |= match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + msk->ipv6_src.s6_addr[i] |= match.mask->src.s6_addr[i]; + msk->ipv6_dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } } nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } -static int -nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule) +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule) { struct flow_match_enc_opts match; + int i; - flow_rule_match_enc_opts(rule, &match); - memcpy(ext, match.key->data, match.key->len); - memcpy(msk, match.mask->data, match.mask->len); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + flow_rule_match_enc_opts(rule, &match); - return 0; + for (i = 0; i < match.mask->len; i++) { + ext[i] |= match.key->data[i] & match.mask->data[i]; + msk[i] |= match.mask->data[i]; + } + } } static void @@ -335,10 +383,10 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, struct flow_match_ipv4_addrs match; flow_rule_match_enc_ipv4_addrs(rule, &match); - ext->src = match.key->src; - ext->dst = match.key->dst; - msk->src = match.mask->src; - msk->dst = match.mask->dst; + ext->src |= match.key->src & match.mask->src; + ext->dst |= match.key->dst & match.mask->dst; + msk->src |= match.mask->src; + msk->dst |= match.mask->dst; } } @@ -349,12 +397,17 @@ nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext, { if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; + int i; flow_rule_match_enc_ipv6_addrs(rule, &match); - ext->src = match.key->src; - ext->dst = match.key->dst; - msk->src = match.mask->src; - msk->dst = match.mask->dst; + for (i = 0; i < sizeof(ext->src); i++) { + ext->src.s6_addr[i] |= match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->dst.s6_addr[i] |= match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + msk->src.s6_addr[i] |= match.mask->src.s6_addr[i]; + msk->dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } } } @@ -367,10 +420,10 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, struct flow_match_ip match; flow_rule_match_enc_ip(rule, &match); - ext->tos = match.key->tos; - ext->ttl = match.key->ttl; - msk->tos = match.mask->tos; - msk->ttl = match.mask->ttl; + ext->tos |= match.key->tos & match.mask->tos; + ext->ttl |= match.key->ttl & match.mask->ttl; + msk->tos |= match.mask->tos; + msk->ttl |= match.mask->ttl; } } @@ -383,10 +436,11 @@ nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk, u32 vni; flow_rule_match_enc_keyid(rule, &match); - vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; - *key = cpu_to_be32(vni); + vni = be32_to_cpu((match.key->keyid & match.mask->keyid)) << + NFP_FL_TUN_VNI_OFFSET; + *key |= cpu_to_be32(vni); vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; - *key_msk = cpu_to_be32(vni); + *key_msk |= cpu_to_be32(vni); } } @@ -398,22 +452,19 @@ nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags, struct flow_match_enc_keyid match; flow_rule_match_enc_keyid(rule, &match); - *key = match.key->keyid; - *key_msk = match.mask->keyid; + *key |= match.key->keyid & match.mask->keyid; + *key_msk |= match.mask->keyid; *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); } } -static void +void nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, struct nfp_flower_ipv4_gre_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); - /* NVGRE is the only supported GRE tunnel type */ ext->ethertype = cpu_to_be16(ETH_P_TEB); msk->ethertype = cpu_to_be16(~0); @@ -424,40 +475,31 @@ nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, &ext->tun_flags, &msk->tun_flags, rule); } -static void +void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, struct nfp_flower_ipv4_udp_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); } -static void +void nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, struct nfp_flower_ipv6_udp_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); - nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); } -static void +void nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, struct nfp_flower_ipv6_gre_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); - /* NVGRE is the only supported GRE tunnel type */ ext->ethertype = cpu_to_be16(ETH_P_TEB); msk->ethertype = cpu_to_be16(~0); @@ -469,14 +511,13 @@ nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, } int nfp_flower_compile_flow_match(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_flower_priv *priv = app->priv; bool qinq_sup; u32 port_id; @@ -527,9 +568,9 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, msk += sizeof(struct nfp_flower_in_port); if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { - err = nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext, - (struct nfp_flower_mac_mpls *)msk, - rule, extack); + err = nfp_flower_compile_mac_mpls((struct nfp_flower_mac_mpls *)ext, + (struct nfp_flower_mac_mpls *)msk, + rule, extack); if (err) return err; @@ -640,9 +681,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { - err = nfp_flower_compile_geneve_opt(ext, msk, rule); - if (err) - return err; + nfp_flower_compile_geneve_opt(ext, msk, rule); } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 621113650a9b..2af9faee96c5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -290,8 +290,7 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, return true; } -int nfp_compile_flow_metadata(struct nfp_app *app, - struct flow_cls_offload *flow, +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, struct nfp_fl_payload *nfp_flow, struct net_device *netdev, struct netlink_ext_ack *extack) @@ -310,7 +309,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app, } nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt); - nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie); + nfp_flow->meta.host_cookie = cpu_to_be64(cookie); nfp_flow->ingress_dev = netdev; ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL); @@ -357,7 +356,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app, priv->stats[stats_cxt].bytes = 0; priv->stats[stats_cxt].used = jiffies; - check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev); + check_entry = nfp_flower_search_fl_table(app, cookie, netdev); if (check_entry) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry"); if (nfp_release_stats_entry(app, stats_cxt)) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 2406d33356ad..556c3495211d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -41,6 +41,8 @@ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ BIT(FLOW_DISSECTOR_KEY_MPLS) | \ + BIT(FLOW_DISSECTOR_KEY_CT) | \ + BIT(FLOW_DISSECTOR_KEY_META) | \ BIT(FLOW_DISSECTOR_KEY_IP)) #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ @@ -89,7 +91,7 @@ struct nfp_flower_merge_check { }; }; -static int +int nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, u8 mtype) { @@ -134,20 +136,16 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, return 0; } -static bool nfp_flower_check_higher_than_mac(struct flow_cls_offload *f) +static bool nfp_flower_check_higher_than_mac(struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(f); - return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); } -static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f) +static bool nfp_flower_check_higher_than_l3(struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(f); - return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); } @@ -236,15 +234,14 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, return 0; } -static int +int nfp_flower_calculate_key_layers(struct nfp_app *app, struct net_device *netdev, struct nfp_fl_key_ls *ret_key_ls, - struct flow_cls_offload *flow, + struct flow_rule *rule, enum nfp_flower_tun_type *tun_type, struct netlink_ext_ack *extack) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_dissector *dissector = rule->match.dissector; struct flow_match_basic basic = { NULL, NULL}; struct nfp_flower_priv *priv = app->priv; @@ -452,7 +449,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported"); return -EOPNOTSUPP; } - } else if (nfp_flower_check_higher_than_mac(flow)) { + } else if (nfp_flower_check_higher_than_mac(rule)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType"); return -EOPNOTSUPP; } @@ -471,7 +468,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } if (!(key_layer & NFP_FLOWER_LAYER_TP) && - nfp_flower_check_higher_than_l3(flow)) { + nfp_flower_check_higher_than_l3(rule)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type"); return -EOPNOTSUPP; } @@ -543,7 +540,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, return 0; } -static struct nfp_fl_payload * +struct nfp_fl_payload * nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) { struct nfp_fl_payload *flow_pay; @@ -1005,9 +1002,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow1, struct nfp_fl_payload *sub_flow2) { - struct flow_cls_offload merge_tc_off; struct nfp_flower_priv *priv = app->priv; - struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *merge_flow; struct nfp_fl_key_ls merge_key_ls; struct nfp_merge_info *merge_info; @@ -1016,7 +1011,6 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, ASSERT_RTNL(); - extack = merge_tc_off.common.extack; if (sub_flow1 == sub_flow2 || nfp_flower_is_merge_flow(sub_flow1) || nfp_flower_is_merge_flow(sub_flow2)) @@ -1061,9 +1055,8 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, if (err) goto err_unlink_sub_flow1; - merge_tc_off.cookie = merge_flow->tc_flower_cookie; - err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow, - merge_flow->ingress_dev, extack); + err = nfp_compile_flow_metadata(app, merge_flow->tc_flower_cookie, merge_flow, + merge_flow->ingress_dev, NULL); if (err) goto err_unlink_sub_flow2; @@ -1305,6 +1298,7 @@ static int nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, struct flow_cls_offload *flow) { + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; struct nfp_flower_priv *priv = app->priv; struct netlink_ext_ack *extack = NULL; @@ -1330,7 +1324,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (!key_layer) return -ENOMEM; - err = nfp_flower_calculate_key_layers(app, netdev, key_layer, flow, + err = nfp_flower_calculate_key_layers(app, netdev, key_layer, rule, &tun_type, extack); if (err) goto err_free_key_ls; @@ -1341,12 +1335,12 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, goto err_free_key_ls; } - err = nfp_flower_compile_flow_match(app, flow, key_layer, netdev, + err = nfp_flower_compile_flow_match(app, rule, key_layer, netdev, flow_pay, tun_type, extack); if (err) goto err_destroy_flow; - err = nfp_flower_compile_action(app, flow, netdev, flow_pay, extack); + err = nfp_flower_compile_action(app, rule, netdev, flow_pay, extack); if (err) goto err_destroy_flow; @@ -1356,7 +1350,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, goto err_destroy_flow; } - err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev, extack); + err = nfp_compile_flow_metadata(app, flow->cookie, flow_pay, netdev, extack); if (err) goto err_destroy_flow; @@ -1476,7 +1470,7 @@ err_free_links: kfree_rcu(merge_flow, rcu); } -static void +void nfp_flower_del_linked_merge_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow) { @@ -1601,7 +1595,7 @@ __nfp_flower_update_merge_stats(struct nfp_app *app, } } -static void +void nfp_flower_update_merge_stats(struct nfp_app *app, struct nfp_fl_payload *sub_flow) { @@ -1628,10 +1622,17 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, struct flow_cls_offload *flow) { struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_ct_map_entry *ct_map_ent; struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *nfp_flow; u32 ctx_id; + /* Check ct_map table first */ + ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + extack = flow->common.extack; nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); if (!nfp_flow) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index df5b748be068..df203738511b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -17,6 +17,7 @@ #include <linux/list.h> #include <linux/netdevice.h> #include <linux/pci.h> +#include <linux/dim.h> #include <linux/io-64-nonatomic-hi-lo.h> #include <linux/semaphore.h> #include <linux/workqueue.h> @@ -360,6 +361,9 @@ struct nfp_net_rx_ring { * @rx_ring: Pointer to RX ring * @xdp_ring: Pointer to an extra TX ring for XDP * @irq_entry: MSI-X table entry (use for talking to the device) + * @event_ctr: Number of interrupt + * @rx_dim: Dynamic interrupt moderation structure for RX + * @tx_dim: Dynamic interrupt moderation structure for TX * @rx_sync: Seqlock for atomic updates of RX stats * @rx_pkts: Number of received packets * @rx_bytes: Number of received bytes @@ -410,6 +414,10 @@ struct nfp_net_r_vector { u16 irq_entry; + u16 event_ctr; + struct dim rx_dim; + struct dim tx_dim; + struct u64_stats_sync rx_sync; u64 rx_pkts; u64 rx_bytes; @@ -571,6 +579,8 @@ struct nfp_net_dp { * mailbox area, crypto TLV * @link_up: Is the link up? * @link_status_lock: Protects @link_* and ensures atomicity with BAR reading + * @rx_coalesce_adapt_on: Is RX interrupt moderation adaptive? + * @tx_coalesce_adapt_on: Is TX interrupt moderation adaptive? * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter * @tx_coalesce_usecs: TX interrupt moderation usecs delay parameter @@ -654,6 +664,8 @@ struct nfp_net { struct semaphore bar_lock; + bool rx_coalesce_adapt_on; + bool tx_coalesce_adapt_on; u32 rx_coalesce_usecs; u32 rx_coalesce_max_frames; u32 tx_coalesce_usecs; @@ -919,6 +931,14 @@ static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev) return netdev->netdev_ops == &nfp_net_netdev_ops; } +static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts) +{ + if ((usecs >= ((1 << 16) - 1)) || (pkts >= ((1 << 16) - 1))) + return -EINVAL; + + return 0; +} + /* Prototypes */ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, void __iomem *ctrl_bar); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 5dfa4799c34f..5bfa22accf2c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -474,6 +474,12 @@ static irqreturn_t nfp_net_irq_rxtx(int irq, void *data) { struct nfp_net_r_vector *r_vec = data; + /* Currently we cannot tell if it's a rx or tx interrupt, + * since dim does not need accurate event_ctr to calculate, + * we just use this counter for both rx and tx dim. + */ + r_vec->event_ctr++; + napi_schedule_irqoff(&r_vec->napi); /* The FW auto-masks any interrupt, either via the MASK bit in @@ -1697,7 +1703,7 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, case NFP_NET_META_RESYNC_INFO: if (nfp_net_tls_rx_resync_req(netdev, data, pkt, pkt_len)) - return NULL; + return false; data += sizeof(struct nfp_net_tls_resync_req); break; default: @@ -2061,6 +2067,36 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) if (napi_complete_done(napi, pkts_polled)) nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + if (r_vec->nfp_net->rx_coalesce_adapt_on) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->rx_dim, dim_sample); + } + + if (r_vec->nfp_net->tx_coalesce_adapt_on) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->tx_dim, dim_sample); + } + return pkts_polled; } @@ -2873,6 +2909,7 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn) */ static void nfp_net_close_stack(struct nfp_net *nn) { + struct nfp_net_r_vector *r_vec; unsigned int r; disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); @@ -2880,8 +2917,16 @@ static void nfp_net_close_stack(struct nfp_net *nn) nn->link_up = false; for (r = 0; r < nn->dp.num_r_vecs; r++) { - disable_irq(nn->r_vecs[r].irq_vector); - napi_disable(&nn->r_vecs[r].napi); + r_vec = &nn->r_vecs[r]; + + disable_irq(r_vec->irq_vector); + napi_disable(&r_vec->napi); + + if (r_vec->rx_ring) + cancel_work_sync(&r_vec->rx_dim.work); + + if (r_vec->tx_ring) + cancel_work_sync(&r_vec->tx_dim.work); } netif_tx_disable(nn->dp.netdev); @@ -2948,17 +2993,92 @@ void nfp_ctrl_close(struct nfp_net *nn) rtnl_unlock(); } +static void nfp_net_rx_dim_work(struct work_struct *work) +{ + struct nfp_net_r_vector *r_vec; + unsigned int factor, value; + struct dim_cq_moder moder; + struct nfp_net *nn; + struct dim *dim; + + dim = container_of(work, struct dim, work); + moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + r_vec = container_of(dim, struct nfp_net_r_vector, rx_dim); + nn = r_vec->nfp_net; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts)) + return; + + /* copy RX interrupt coalesce parameters */ + value = (moder.pkts << 16) | (factor * moder.usec); + rtnl_lock(); + nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(r_vec->rx_ring->idx), value); + (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); + rtnl_unlock(); + + dim->state = DIM_START_MEASURE; +} + +static void nfp_net_tx_dim_work(struct work_struct *work) +{ + struct nfp_net_r_vector *r_vec; + unsigned int factor, value; + struct dim_cq_moder moder; + struct nfp_net *nn; + struct dim *dim; + + dim = container_of(work, struct dim, work); + moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + r_vec = container_of(dim, struct nfp_net_r_vector, tx_dim); + nn = r_vec->nfp_net; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts)) + return; + + /* copy TX interrupt coalesce parameters */ + value = (moder.pkts << 16) | (factor * moder.usec); + rtnl_lock(); + nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(r_vec->tx_ring->idx), value); + (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); + rtnl_unlock(); + + dim->state = DIM_START_MEASURE; +} + /** * nfp_net_open_stack() - Start the device from stack's perspective * @nn: NFP Net device to reconfigure */ static void nfp_net_open_stack(struct nfp_net *nn) { + struct nfp_net_r_vector *r_vec; unsigned int r; for (r = 0; r < nn->dp.num_r_vecs; r++) { - napi_enable(&nn->r_vecs[r].napi); - enable_irq(nn->r_vecs[r].irq_vector); + r_vec = &nn->r_vecs[r]; + + if (r_vec->rx_ring) { + INIT_WORK(&r_vec->rx_dim.work, nfp_net_rx_dim_work); + r_vec->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + if (r_vec->tx_ring) { + INIT_WORK(&r_vec->tx_dim.work, nfp_net_tx_dim_work); + r_vec->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + napi_enable(&r_vec->napi); + enable_irq(r_vec->irq_vector); } netif_tx_wake_all_queues(nn->dp.netdev); @@ -3161,17 +3281,12 @@ static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp) for (r = 0; r < nn->max_r_vecs; r++) nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r); - err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings); + err = netif_set_real_num_queues(nn->dp.netdev, + nn->dp.num_stack_tx_rings, + nn->dp.num_rx_rings); if (err) return err; - if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) { - err = netif_set_real_num_tx_queues(nn->dp.netdev, - nn->dp.num_stack_tx_rings); - if (err) - return err; - } - return nfp_net_set_config_and_enable(nn); } @@ -3893,6 +4008,9 @@ static void nfp_net_irqmod_init(struct nfp_net *nn) nn->rx_coalesce_max_frames = 64; nn->tx_coalesce_usecs = 50; nn->tx_coalesce_max_frames = 64; + + nn->rx_coalesce_adapt_on = true; + nn->tx_coalesce_adapt_on = true; } static void nfp_net_netdev_init(struct nfp_net *nn) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 8803faadd302..0bf2ff5717bc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1085,6 +1085,9 @@ static int nfp_net_get_coalesce(struct net_device *netdev, if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) return -EINVAL; + ec->use_adaptive_rx_coalesce = nn->rx_coalesce_adapt_on; + ec->use_adaptive_tx_coalesce = nn->tx_coalesce_adapt_on; + ec->rx_coalesce_usecs = nn->rx_coalesce_usecs; ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames; ec->tx_coalesce_usecs = nn->tx_coalesce_usecs; @@ -1361,19 +1364,18 @@ static int nfp_net_set_coalesce(struct net_device *netdev, if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames) return -EINVAL; - if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1)) - return -EINVAL; - - if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1)) + if (nfp_net_coalesce_para_check(ec->rx_coalesce_usecs * factor, + ec->rx_max_coalesced_frames)) return -EINVAL; - if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1)) - return -EINVAL; - - if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1)) + if (nfp_net_coalesce_para_check(ec->tx_coalesce_usecs * factor, + ec->tx_max_coalesced_frames)) return -EINVAL; /* configuration is valid */ + nn->rx_coalesce_adapt_on = !!ec->use_adaptive_rx_coalesce; + nn->tx_coalesce_adapt_on = !!ec->use_adaptive_tx_coalesce; + nn->rx_coalesce_usecs = ec->rx_coalesce_usecs; nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames; nn->tx_coalesce_usecs = ec->tx_coalesce_usecs; @@ -1445,7 +1447,8 @@ static int nfp_net_set_channels(struct net_device *netdev, static const struct ethtool_ops nfp_net_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_MAX_FRAMES, + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = nfp_net_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = nfp_net_get_ringparam, diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 64c6842bd452..d29fe562b3de 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1219,7 +1219,7 @@ static const struct net_device_ops lpc_netdev_ops = { .ndo_stop = lpc_eth_close, .ndo_start_xmit = lpc_eth_hard_start_xmit, .ndo_set_rx_mode = lpc_eth_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_mac_address = lpc_set_mac_address, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index e351f3d1608f..bc35d5703bd2 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2333,7 +2333,7 @@ static const struct net_device_ops pch_gbe_netdev_ops = { .ndo_tx_timeout = pch_gbe_tx_timeout, .ndo_change_mtu = pch_gbe_change_mtu, .ndo_set_features = pch_gbe_set_features, - .ndo_do_ioctl = pch_gbe_ioctl, + .ndo_eth_ioctl = pch_gbe_ioctl, .ndo_set_rx_mode = pch_gbe_set_multi, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = pch_gbe_netpoll, diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index d058a63602a9..1a6336a56d3d 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -546,7 +546,9 @@ static int read_eeprom(void __iomem *ioaddr, int location); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int hamachi_open(struct net_device *dev); -static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int hamachi_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int hamachi_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); static void hamachi_timer(struct timer_list *t); static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue); static void hamachi_init_ring(struct net_device *dev); @@ -571,7 +573,8 @@ static const struct net_device_ops hamachi_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_tx_timeout = hamachi_tx_timeout, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = hamachi_ioctl, + .ndo_siocdevprivate = hamachi_siocdevprivate, }; @@ -1867,7 +1870,36 @@ static const struct ethtool_ops ethtool_ops_no_mii = { .get_drvinfo = hamachi_get_drvinfo, }; -static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +/* private ioctl: set rx,tx intr params */ +static int hamachi_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd) +{ + struct hamachi_private *np = netdev_priv(dev); + u32 *d = (u32 *)&rq->ifr_ifru; + + if (!netif_running(dev)) + return -EINVAL; + + if (cmd != SIOCDEVPRIVATE + 3) + return -EOPNOTSUPP; + + /* Should add this check here or an ordinary user can do nasty + * things. -KDU + * + * TODO: Shut down the Rx and Tx engines while doing this. + */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + writel(d[0], np->base + TxIntrCtrl); + writel(d[1], np->base + RxIntrCtrl); + printk(KERN_NOTICE "%s: tx %08x, rx %08x intr\n", dev->name, + (u32)readl(np->base + TxIntrCtrl), + (u32)readl(np->base + RxIntrCtrl)); + + return 0; +} + +static int hamachi_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct hamachi_private *np = netdev_priv(dev); struct mii_ioctl_data *data = if_mii(rq); @@ -1876,28 +1908,9 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!netif_running(dev)) return -EINVAL; - if (cmd == (SIOCDEVPRIVATE+3)) { /* set rx,tx intr params */ - u32 *d = (u32 *)&rq->ifr_ifru; - /* Should add this check here or an ordinary user can do nasty - * things. -KDU - * - * TODO: Shut down the Rx and Tx engines while doing this. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - writel(d[0], np->base + TxIntrCtrl); - writel(d[1], np->base + RxIntrCtrl); - printk(KERN_NOTICE "%s: tx %08x, rx %08x intr\n", dev->name, - (u32) readl(np->base + TxIntrCtrl), - (u32) readl(np->base + RxIntrCtrl)); - rc = 0; - } - - else { - spin_lock_irq(&np->lock); - rc = generic_mii_ioctl(&np->mii_if, data, cmd, NULL); - spin_unlock_irq(&np->lock); - } + spin_lock_irq(&np->lock); + rc = generic_mii_ioctl(&np->mii_if, data, cmd, NULL); + spin_unlock_irq(&np->lock); return rc; } diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index d1dd9bc1bc7f..f5cd8f51be7c 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -362,7 +362,7 @@ static const struct net_device_ops netdev_ops = { .ndo_set_rx_mode = set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = yellowfin_tx_timeout, }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index e4a5416adc80..7e296fa71b36 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -165,10 +165,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs) goto out; } + ionic->num_vfs++; /* ignore failures from older FW, we just won't get stats */ (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR, (u8 *)&v->stats_pa); - ionic->num_vfs++; } out: @@ -373,9 +373,6 @@ static void ionic_remove(struct pci_dev *pdev) { struct ionic *ionic = pci_get_drvdata(pdev); - if (!ionic) - return; - del_timer_sync(&ionic->watchdog_timer); if (ionic->lif) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 1dfe962e22e0..9aac647290f7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -106,6 +106,8 @@ int ionic_dev_setup(struct ionic *ionic) idev->last_fw_hb = 0; idev->fw_hb_ready = true; idev->fw_status_ready = true; + idev->fw_generation = IONIC_FW_STS_F_GENERATION & + ioread8(&idev->dev_info_regs->fw_status); mod_timer(&ionic->watchdog_timer, round_jiffies(jiffies + ionic->watchdog_period)); @@ -121,7 +123,9 @@ int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; unsigned long check_time, last_check_time; - bool fw_status_ready, fw_hb_ready; + bool fw_status_ready = true; + bool fw_hb_ready; + u8 fw_generation; u8 fw_status; u32 fw_hb; @@ -140,9 +144,29 @@ do_check_time: /* firmware is useful only if the running bit is set and * fw_status != 0xff (bad PCI read) + * If fw_status is not ready don't bother with the generation. */ fw_status = ioread8(&idev->dev_info_regs->fw_status); - fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING); + + if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) { + fw_status_ready = false; + } else { + fw_generation = fw_status & IONIC_FW_STS_F_GENERATION; + if (idev->fw_generation != fw_generation) { + dev_info(ionic->dev, "FW generation 0x%02x -> 0x%02x\n", + idev->fw_generation, fw_generation); + + idev->fw_generation = fw_generation; + + /* If the generation changed, the fw status is not + * ready so we need to trigger a fw-down cycle. After + * the down, the next watchdog will see the fw is up + * and the generation value stable, so will trigger + * the fw-up activity. + */ + fw_status_ready = false; + } + } /* is this a transition? */ if (fw_status_ready != idev->fw_status_ready) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index c25cf9b744c5..8311086fb1f4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -143,6 +143,7 @@ struct ionic_dev { u32 last_fw_hb; bool fw_hb_ready; bool fw_status_ready; + u8 fw_generation; u64 __iomem *db_pages; dma_addr_t phy_db_pages; @@ -160,8 +161,6 @@ struct ionic_dev { struct ionic_cq_info { union { void *cq_desc; - struct ionic_txq_comp *txcq; - struct ionic_rxq_comp *rxcq; struct ionic_admin_comp *admincq; struct ionic_notifyq_event *notifyq; }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index b41301a5b0df..cd520e4c5522 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -91,20 +91,20 @@ int ionic_devlink_register(struct ionic *ionic) attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; devlink_port_attrs_set(&ionic->dl_port, &attrs); err = devlink_port_register(dl, &ionic->dl_port, 0); - if (err) + if (err) { dev_err(ionic->dev, "devlink_port_register failed: %d\n", err); - else - devlink_port_type_eth_set(&ionic->dl_port, - ionic->lif->netdev); + devlink_unregister(dl); + return err; + } - return err; + devlink_port_type_eth_set(&ionic->dl_port, ionic->lif->netdev); + return 0; } void ionic_devlink_unregister(struct ionic *ionic) { struct devlink *dl = priv_to_devlink(ionic); - if (ionic->dl_port.registered) - devlink_port_unregister(&ionic->dl_port); + devlink_port_unregister(&ionic->dl_port); devlink_unregister(dl); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 6583be570e45..adc9fdb03e86 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -32,6 +32,9 @@ static void ionic_get_stats(struct net_device *netdev, struct ionic_lif *lif = netdev_priv(netdev); u32 i; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return; + memset(buf, 0, stats->n_stats * sizeof(*buf)); for (i = 0; i < ionic_num_stats_grps; i++) ionic_stats_groups[i].get_values(lif, &buf); @@ -274,6 +277,9 @@ static int ionic_set_link_ksettings(struct net_device *netdev, struct ionic *ionic = lif->ionic; int err = 0; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return -EBUSY; + /* set autoneg */ if (ks->base.autoneg != idev->port_info->config.an_enable) { mutex_lock(&ionic->dev_cmd_lock); @@ -320,6 +326,9 @@ static int ionic_set_pauseparam(struct net_device *netdev, u32 requested_pause; int err; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return -EBUSY; + if (pause->autoneg) return -EOPNOTSUPP; @@ -372,6 +381,9 @@ static int ionic_set_fecparam(struct net_device *netdev, u8 fec_type; int ret = 0; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return -EBUSY; + if (lif->ionic->idev.port_info->config.an_enable) { netdev_err(netdev, "FEC request not allowed while autoneg is enabled\n"); return -EINVAL; @@ -528,6 +540,9 @@ static int ionic_set_ringparam(struct net_device *netdev, struct ionic_queue_params qparam; int err; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return -EBUSY; + ionic_init_queue_params(lif, &qparam); if (ring->rx_mini_pending || ring->rx_jumbo_pending) { @@ -597,6 +612,9 @@ static int ionic_set_channels(struct net_device *netdev, int max_cnt; int err; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return -EBUSY; + ionic_init_queue_params(lif, &qparam); if (ch->rx_count != ch->tx_count) { @@ -947,6 +965,9 @@ static int ionic_nway_reset(struct net_device *netdev) struct ionic *ionic = lif->ionic; int err = 0; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return -EBUSY; + /* flap the link to force auto-negotiation */ mutex_lock(&ionic->dev_cmd_lock); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 0478b48d9895..278610ed7227 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -2936,6 +2936,8 @@ struct ionic_hwstamp_regs { * @asic_type: Asic type * @asic_rev: Asic revision * @fw_status: Firmware status + * bit 0 - 1 = fw running + * bit 4-7 - 4 bit generation number, changes on fw restart * @fw_heartbeat: Firmware heartbeat counter * @serial_num: Serial number * @fw_version: Firmware version @@ -2949,7 +2951,8 @@ union ionic_dev_info_regs { u8 version; u8 asic_type; u8 asic_rev; -#define IONIC_FW_STS_F_RUNNING 0x1 +#define IONIC_FW_STS_F_RUNNING 0x01 +#define IONIC_FW_STS_F_GENERATION 0xF0 u8 fw_status; u32 fw_heartbeat; char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN]; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index e795fa63ca12..f52c47a71f4b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -11,6 +11,7 @@ #include <linux/interrupt.h> #include <linux/pci.h> #include <linux/cpumask.h> +#include <linux/crash_dump.h> #include "ionic.h" #include "ionic_bus.h" @@ -1599,7 +1600,6 @@ static int ionic_init_nic_features(struct ionic_lif *lif) features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM | @@ -1607,6 +1607,9 @@ static int ionic_init_nic_features(struct ionic_lif *lif) NETIF_F_TSO6 | NETIF_F_TSO_ECN; + if (lif->nxqs > 1) + features |= NETIF_F_RXHASH; + err = ionic_set_nic_features(lif, features); if (err) return err; @@ -2257,7 +2260,7 @@ static int ionic_stop(struct net_device *netdev) return 0; } -static int ionic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct ionic_lif *lif = netdev_priv(netdev); @@ -2519,7 +2522,7 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) static const struct net_device_ops ionic_netdev_ops = { .ndo_open = ionic_open, .ndo_stop = ionic_stop, - .ndo_do_ioctl = ionic_do_ioctl, + .ndo_eth_ioctl = ionic_eth_ioctl, .ndo_start_xmit = ionic_start_xmit, .ndo_get_stats64 = ionic_get_stats64, .ndo_set_rx_mode = ionic_ndo_set_rx_mode, @@ -2580,22 +2583,26 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, struct ionic_qcq **tx_qcqs = NULL; struct ionic_qcq **rx_qcqs = NULL; unsigned int flags, i; - int err = -ENOMEM; + int err = 0; /* allocate temporary qcq arrays to hold new queue structs */ if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) { tx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->ntxqs_per_lif, sizeof(struct ionic_qcq *), GFP_KERNEL); - if (!tx_qcqs) + if (!tx_qcqs) { + err = -ENOMEM; goto err_out; + } } if (qparam->nxqs != lif->nxqs || qparam->nrxq_descs != lif->nrxq_descs || qparam->rxq_features != lif->rxq_features) { rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif, sizeof(struct ionic_qcq *), GFP_KERNEL); - if (!rx_qcqs) + if (!rx_qcqs) { + err = -ENOMEM; goto err_out; + } } /* allocate new desc_info and rings, but leave the interrupt setup @@ -2774,6 +2781,9 @@ err_out: ionic_qcq_free(lif, lif->rxqcqs[i]); } + if (err) + netdev_info(lif->netdev, "%s: failed %d\n", __func__, err); + return err; } @@ -2827,8 +2837,14 @@ int ionic_lif_alloc(struct ionic *ionic) lif->ionic = ionic; lif->index = 0; - lif->ntxq_descs = IONIC_DEF_TXRX_DESC; - lif->nrxq_descs = IONIC_DEF_TXRX_DESC; + + if (is_kdump_kernel()) { + lif->ntxq_descs = IONIC_MIN_TXRX_DESC; + lif->nrxq_descs = IONIC_MIN_TXRX_DESC; + } else { + lif->ntxq_descs = IONIC_DEF_TXRX_DESC; + lif->nrxq_descs = IONIC_DEF_TXRX_DESC; + } /* Convert the default coalesce value to actual hw resolution */ lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT; @@ -3514,6 +3530,7 @@ int ionic_lif_size(struct ionic *ionic) unsigned int min_intrs; int err; + /* retrieve basic values from FW */ lc = &ident->lif.eth.config; dev_nintrs = le32_to_cpu(ident->dev.nintrs); neqs_per_lif = le32_to_cpu(ident->lif.rdma.eq_qtype.qid_count); @@ -3521,6 +3538,15 @@ int ionic_lif_size(struct ionic *ionic) ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]); nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]); + /* limit values to play nice with kdump */ + if (is_kdump_kernel()) { + dev_nintrs = 2; + neqs_per_lif = 0; + nnqs_per_lif = 0; + ntxqs_per_lif = 1; + nrxqs_per_lif = 1; + } + /* reserve last queue id for hardware timestamping */ if (lc->features & cpu_to_le64(IONIC_ETH_HW_TIMESTAMP)) { if (ntxqs_per_lif <= 1 || nrxqs_per_lif <= 1) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 61cfe2120817..5f1e5b6e85c3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -450,6 +450,8 @@ int ionic_identify(struct ionic *ionic) } mutex_unlock(&ionic->dev_cmd_lock); + dev_info(ionic->dev, "FW: %s\n", idev->dev_info.fw_version); + if (err) { dev_err(ionic->dev, "Cannot identify ionic: %dn", err); goto err_out; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index 6e2403c71608..afc45da399d4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -119,8 +119,8 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif, config->rx_filter = HWTSTAMP_FILTER_ALL; } - dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n", - config->rx_filter, rx_filt, rx_all); + dev_dbg(ionic->dev, "%s: config_rx_filter %d rx_filt %#llx rx_all %d\n", + __func__, config->rx_filter, rx_filt, rx_all); if (tx_mode) { err = ionic_lif_create_hwstamp_txq(lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 08870190e4d2..37c39581b659 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -32,19 +32,13 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q) return netdev_get_tx_queue(q->lif->netdev, q->index); } -static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info) -{ - buf_info->page = NULL; - buf_info->page_offset = 0; - buf_info->dma_addr = 0; -} - static int ionic_rx_page_alloc(struct ionic_queue *q, struct ionic_buf_info *buf_info) { struct net_device *netdev = q->lif->netdev; struct ionic_rx_stats *stats; struct device *dev; + struct page *page; dev = q->dev; stats = q_to_rx_stats(q); @@ -55,26 +49,27 @@ static int ionic_rx_page_alloc(struct ionic_queue *q, return -EINVAL; } - buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0); - if (unlikely(!buf_info->page)) { + page = alloc_pages(IONIC_PAGE_GFP_MASK, 0); + if (unlikely(!page)) { net_err_ratelimited("%s: %s page alloc failed\n", netdev->name, q->name); stats->alloc_err++; return -ENOMEM; } - buf_info->page_offset = 0; - buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset, + buf_info->dma_addr = dma_map_page(dev, page, 0, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) { - __free_pages(buf_info->page, 0); - ionic_rx_buf_reset(buf_info); + __free_pages(page, 0); net_err_ratelimited("%s: %s dma map failed\n", netdev->name, q->name); stats->dma_map_err++; return -EIO; } + buf_info->page = page; + buf_info->page_offset = 0; + return 0; } @@ -95,7 +90,7 @@ static void ionic_rx_page_free(struct ionic_queue *q, dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); __free_pages(buf_info->page, 0); - ionic_rx_buf_reset(buf_info); + buf_info->page = NULL; } static bool ionic_rx_buf_recycle(struct ionic_queue *q, @@ -139,7 +134,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, buf_info = &desc_info->bufs[0]; len = le16_to_cpu(comp->len); - prefetch(buf_info->page); + prefetchw(buf_info->page); skb = napi_get_frags(&q_to_qcq(q)->napi); if (unlikely(!skb)) { @@ -170,7 +165,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) { dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - ionic_rx_buf_reset(buf_info); + buf_info->page = NULL; } buf_info++; diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index b590c70539b5..d58e021614cd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -26,15 +26,6 @@ extern const struct qed_common_ops qed_common_ops_pass; -#define QED_MAJOR_VERSION 8 -#define QED_MINOR_VERSION 37 -#define QED_REVISION_VERSION 0 -#define QED_ENGINEERING_VERSION 20 - -#define QED_VERSION \ - ((QED_MAJOR_VERSION << 24) | (QED_MINOR_VERSION << 16) | \ - (QED_REVISION_VERSION << 8) | QED_ENGINEERING_VERSION) - #define STORM_FW_VERSION \ ((FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | \ (FW_REVISION_VERSION << 8) | FW_ENGINEERING_VERSION) @@ -517,12 +508,6 @@ enum qed_hsi_def_type { QED_NUM_HSI_DEFS }; -#define DRV_MODULE_VERSION \ - __stringify(QED_MAJOR_VERSION) "." \ - __stringify(QED_MINOR_VERSION) "." \ - __stringify(QED_REVISION_VERSION) "." \ - __stringify(QED_ENGINEERING_VERSION) - struct qed_simd_fp_handler { void *token; void (*func)(void *); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index e81dd34a3cac..dc93ddea8906 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -741,7 +741,6 @@ static int qed_dcbx_read_local_lldp_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_dcbx_mib_meta_data data; - int rc = 0; memset(&data, 0, sizeof(data)); data.addr = p_hwfn->mcp_info->port_addr + offsetof(struct public_port, @@ -750,7 +749,7 @@ qed_dcbx_read_local_lldp_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) data.size = sizeof(struct lldp_config_params_s); qed_memcpy_from(p_hwfn, p_ptt, data.lldp_local, data.addr, data.size); - return rc; + return 0; } static int @@ -810,7 +809,6 @@ static int qed_dcbx_read_local_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_dcbx_mib_meta_data data; - int rc = 0; memset(&data, 0, sizeof(data)); data.addr = p_hwfn->mcp_info->port_addr + @@ -819,7 +817,7 @@ qed_dcbx_read_local_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) data.size = sizeof(struct dcbx_local_params); qed_memcpy_from(p_hwfn, p_ptt, data.local_admin, data.addr, data.size); - return rc; + return 0; } static int qed_dcbx_read_mib(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 578935f643b8..ab6d4f737316 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -464,12 +464,19 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) u32 int_sts, first_drop_reason, details, address, all_drops_reason; struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; + int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); + if (int_sts == 0xdeadbeaf) { + DP_NOTICE(p_hwfn->cdev, + "DORQ is being reset, skipping int_sts handler\n"); + + return 0; + } + /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If * This PF also requires overflow recovery we will be interrupted again. * The masked almost full indication may also be set. Ignoring. */ - int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) return 0; @@ -528,6 +535,9 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) { + if (p_hwfn->cdev->recov_in_prog) + return 0; + p_hwfn->db_recovery_info.dorq_attn = true; qed_dorq_attn_overflow(p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index a99861124630..fc8b3e64f153 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1624,8 +1624,6 @@ qed_iwarp_get_listener(struct qed_hwfn *p_hwfn, static const u32 ip_zero[4] = { 0, 0, 0, 0 }; bool found = false; - qed_iwarp_print_cm_info(p_hwfn, cm_info); - list_for_each_entry(listener, &p_hwfn->p_rdma_info->iwarp.listen_list, list_entry) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 5bd58c65e163..6871d892eabf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -49,11 +49,10 @@ #define QED_NVM_CFG_MAX_ATTRS 50 static char version[] = - "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; + "QLogic FastLinQ 4xxxx Core Module qed\n"; MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx Core Module"); MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_MODULE_VERSION); #define FW_FILE_VERSION \ __stringify(FW_MAJOR_VERSION) "." \ @@ -1216,6 +1215,10 @@ static void qed_slowpath_task(struct work_struct *work) if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC, &hwfn->slowpath_task_flags)) { + /* skip qed_db_rec_handler during recovery/unload */ + if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active) + goto out; + qed_db_rec_handler(hwfn, ptt); if (hwfn->periodic_db_rec_count--) qed_slowpath_delayed_work(hwfn, @@ -1223,6 +1226,7 @@ static void qed_slowpath_task(struct work_struct *work) QED_PERIODIC_DB_REC_INTERVAL); } +out: qed_ptt_release(hwfn, ptt); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 4387292c37e2..6e5a6cc97d0e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -944,7 +944,6 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn, memset(&in_params, 0, sizeof(in_params)); in_params.hsi_ver = QED_LOAD_REQ_HSI_VER_DEFAULT; - in_params.drv_ver_0 = QED_VERSION; in_params.drv_ver_1 = qed_get_config_bitmap(); in_params.fw_ver = STORM_FW_VERSION; rc = eocre_get_mfw_drv_role(p_hwfn, p_params->drv_role, &mfw_drv_role); diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c index c1dd71d19f3f..3b84d00cf987 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c @@ -4,7 +4,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/types.h> diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 5630008f38b7..66c69f0f9af1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -30,15 +30,6 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> -#define QEDE_MAJOR_VERSION 8 -#define QEDE_MINOR_VERSION 37 -#define QEDE_REVISION_VERSION 0 -#define QEDE_ENGINEERING_VERSION 20 -#define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ - __stringify(QEDE_MINOR_VERSION) "." \ - __stringify(QEDE_REVISION_VERSION) "." \ - __stringify(QEDE_ENGINEERING_VERSION) - #define DRV_MODULE_SYM qede struct qede_stats_common { diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 1560ad3d9290..9c6aa6859646 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -625,13 +625,13 @@ static void qede_get_drvinfo(struct net_device *ndev, (edev->dev_info.common.mfw_rev >> 8) & 0xFF, edev->dev_info.common.mfw_rev & 0xFF); - if ((strlen(storm) + strlen(DRV_MODULE_VERSION) + strlen("[storm] ")) < + if ((strlen(storm) + strlen("[storm]")) < sizeof(info->version)) snprintf(info->version, sizeof(info->version), - "%s [storm %s]", DRV_MODULE_VERSION, storm); + "[storm %s]", storm); else snprintf(info->version, sizeof(info->version), - "%s %s", DRV_MODULE_VERSION, storm); + "%s", storm); if (edev->dev_info.common.mbi_version) { snprintf(mbi, ETHTOOL_FWVERS_LEN, "%d.%d.%d", diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 7c6064baeba2..d400e9b235bf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -39,12 +39,8 @@ #include "qede.h" #include "qede_ptp.h" -static char version[] = - "QLogic FastLinQ 4xxxx Ethernet Driver qede " DRV_MODULE_VERSION "\n"; - MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx Ethernet Driver"); MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_MODULE_VERSION); static uint debug; module_param(debug, uint, 0); @@ -258,7 +254,7 @@ int __init qede_init(void) { int ret; - pr_info("qede_init: %s\n", version); + pr_info("qede init: QLogic FastLinQ 4xxxx Ethernet Driver qede\n"); qede_forced_speed_maps_init(); @@ -644,7 +640,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = qede_change_mtu, - .ndo_do_ioctl = qede_ioctl, + .ndo_eth_ioctl = qede_ioctl, .ndo_tx_timeout = qede_tx_timeout, #ifdef CONFIG_QED_SRIOV .ndo_set_vf_mac = qede_set_vf_mac, @@ -1157,10 +1153,6 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, /* Start the Slowpath-process */ memset(&sp_params, 0, sizeof(sp_params)); sp_params.int_mode = QED_INT_MODE_MSIX; - sp_params.drv_major = QEDE_MAJOR_VERSION; - sp_params.drv_minor = QEDE_MINOR_VERSION; - sp_params.drv_rev = QEDE_REVISION_VERSION; - sp_params.drv_eng = QEDE_ENGINEERING_VERSION; strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE); rc = qed_ops->common->slowpath_start(cdev, &sp_params); if (rc) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index d8f0863b3934..f6b6651decf3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1021,7 +1021,7 @@ clear_diag_irq: static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[]) { - unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00}; + static const unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00}; memset(data, 0x4e, QLCNIC_ILB_PKT_SIZE); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index ad655f0a4965..9015a38eaced 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -377,7 +377,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_start_xmit = emac_start_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = emac_change_mtu, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_tx_timeout = emac_tx_timeout, .ndo_get_stats64 = emac_get_stats64, .ndo_set_features = emac_set_features, diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 47e9998b62f0..4b2eca5e08e2 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -954,7 +954,7 @@ static const struct net_device_ops r6040_netdev_ops = { .ndo_set_rx_mode = r6040_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = r6040_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = r6040_poll_controller, diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 9677e257e9a1..edc61906694f 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1869,7 +1869,7 @@ static const struct net_device_ops cp_netdev_ops = { .ndo_set_mac_address = cp_set_mac_address, .ndo_set_rx_mode = cp_set_rx_mode, .ndo_get_stats = cp_get_stats, - .ndo_do_ioctl = cp_ioctl, + .ndo_eth_ioctl = cp_ioctl, .ndo_start_xmit = cp_start_xmit, .ndo_tx_timeout = cp_tx_timeout, .ndo_set_features = cp_set_features, diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index f0608f050050..2e6923cc653e 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -932,7 +932,7 @@ static const struct net_device_ops rtl8139_netdev_ops = { .ndo_set_mac_address = rtl8139_set_mac_address, .ndo_start_xmit = rtl8139_start_xmit, .ndo_set_rx_mode = rtl8139_set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = rtl8139_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = rtl8139_poll_controller, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c7af5bc3b8af..fa2dab6980bb 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4979,7 +4979,7 @@ static const struct net_device_ops rtl_netdev_ops = { .ndo_fix_features = rtl8169_fix_features, .ndo_set_features = rtl8169_set_features, .ndo_set_mac_address = rtl_set_mac_address, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = rtl_set_rx_mode, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = rtl8169_netpoll, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 805397088850..f4dfe9f71d06 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1872,7 +1872,7 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_get_stats = ravb_get_stats, .ndo_set_rx_mode = ravb_set_rx_mode, .ndo_tx_timeout = ravb_tx_timeout, - .ndo_do_ioctl = ravb_do_ioctl, + .ndo_eth_ioctl = ravb_do_ioctl, .ndo_change_mtu = ravb_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 840478692a37..6c8ba916d1a6 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3141,7 +3141,7 @@ static const struct net_device_ops sh_eth_netdev_ops = { .ndo_get_stats = sh_eth_get_stats, .ndo_set_rx_mode = sh_eth_set_rx_mode, .ndo_tx_timeout = sh_eth_tx_timeout, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_change_mtu = sh_eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, @@ -3157,7 +3157,7 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = { .ndo_vlan_rx_add_vid = sh_eth_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = sh_eth_vlan_rx_kill_vid, .ndo_tx_timeout = sh_eth_tx_timeout, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_change_mtu = sh_eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 315a6e5c0f59..e75814a4654f 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -119,7 +119,8 @@ struct rocker_world_ops { int (*port_obj_fdb_del)(struct rocker_port *rocker_port, u16 vid, const unsigned char *addr); int (*port_master_linked)(struct rocker_port *rocker_port, - struct net_device *master); + struct net_device *master, + struct netlink_ext_ack *extack); int (*port_master_unlinked)(struct rocker_port *rocker_port, struct net_device *master); int (*port_neigh_update)(struct rocker_port *rocker_port, diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index a46633606cae..53d407a5dbf7 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1670,13 +1670,14 @@ rocker_world_port_fdb_del(struct rocker_port *rocker_port, } static int rocker_world_port_master_linked(struct rocker_port *rocker_port, - struct net_device *master) + struct net_device *master, + struct netlink_ext_ack *extack) { struct rocker_world_ops *wops = rocker_port->rocker->wops; if (!wops->port_master_linked) return -EOPNOTSUPP; - return wops->port_master_linked(rocker_port, master); + return wops->port_master_linked(rocker_port, master, extack); } static int rocker_world_port_master_unlinked(struct rocker_port *rocker_port, @@ -3107,6 +3108,7 @@ struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, static int rocker_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct rocker_port *rocker_port; @@ -3123,7 +3125,8 @@ static int rocker_netdevice_event(struct notifier_block *unused, rocker_port = netdev_priv(dev); if (info->linking) { err = rocker_world_port_master_linked(rocker_port, - info->upper_dev); + info->upper_dev, + extack); if (err) netdev_warn(dev, "failed to reflect master linked (err %d)\n", err); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 967a634ee9ac..b82e169b7836 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2571,8 +2571,10 @@ static int ofdpa_port_obj_fdb_del(struct rocker_port *rocker_port, } static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, - struct net_device *bridge) + struct net_device *bridge, + struct netlink_ext_ack *extack) { + struct net_device *dev = ofdpa_port->dev; int err; /* Port is joining bridge, so the internal VLAN for the @@ -2592,13 +2594,21 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, ofdpa_port->bridge_dev = bridge; - return ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0); + err = ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0); + if (err) + return err; + + return switchdev_bridge_port_offload(dev, dev, NULL, NULL, NULL, + false, extack); } static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) { + struct net_device *dev = ofdpa_port->dev; int err; + switchdev_bridge_port_unoffload(dev, NULL, NULL, NULL); + err = ofdpa_port_vlan_del(ofdpa_port, OFDPA_UNTAGGED_VID, 0); if (err) return err; @@ -2637,13 +2647,14 @@ static int ofdpa_port_ovs_changed(struct ofdpa_port *ofdpa_port, } static int ofdpa_port_master_linked(struct rocker_port *rocker_port, - struct net_device *master) + struct net_device *master, + struct netlink_ext_ack *extack) { struct ofdpa_port *ofdpa_port = rocker_port->wpriv; int err = 0; if (netif_is_bridge_master(master)) - err = ofdpa_port_bridge_join(ofdpa_port, master); + err = ofdpa_port_bridge_join(ofdpa_port, master, extack); else if (netif_is_ovs_master(master)) err = ofdpa_port_ovs_changed(ofdpa_port, master); return err; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 090bcd2fb758..6781aa636d58 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1964,7 +1964,7 @@ static const struct net_device_ops sxgbe_netdev_ops = { .ndo_set_features = sxgbe_set_features, .ndo_set_rx_mode = sxgbe_set_rx_mode, .ndo_tx_timeout = sxgbe_tx_timeout, - .ndo_do_ioctl = sxgbe_ioctl, + .ndo_eth_ioctl = sxgbe_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = sxgbe_poll_controller, #endif diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 37fcf2eb0741..a295e2621cf3 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -591,7 +591,7 @@ static const struct net_device_ops efx_netdev_ops = { .ndo_tx_timeout = efx_watchdog, .ndo_start_xmit = efx_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = efx_ioctl, + .ndo_eth_ioctl = efx_ioctl, .ndo_change_mtu = efx_change_mtu, .ndo_set_mac_address = efx_set_mac_address, .ndo_set_rx_mode = efx_set_rx_mode, diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 9ec752a43c75..c177ea0f301e 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2219,7 +2219,7 @@ static const struct net_device_ops ef4_netdev_ops = { .ndo_tx_timeout = ef4_watchdog, .ndo_start_xmit = ef4_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = ef4_ioctl, + .ndo_eth_ioctl = ef4_ioctl, .ndo_change_mtu = ef4_change_mtu, .ndo_set_mac_address = ef4_set_mac_address, .ndo_set_rx_mode = ef4_set_rx_mode, diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 2b29fd4cbdf4..062f7844c496 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -820,7 +820,7 @@ static const struct net_device_ops ioc3_netdev_ops = { .ndo_tx_timeout = ioc3_timeout, .ndo_get_stats = ioc3_get_stats, .ndo_set_rx_mode = ioc3_set_multicast_list, - .ndo_do_ioctl = ioc3_ioctl, + .ndo_eth_ioctl = ioc3_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ioc3_set_mac_address, }; diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 0c396ecd3389..efce834d8ee6 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -812,7 +812,7 @@ static const struct net_device_ops meth_netdev_ops = { .ndo_open = meth_open, .ndo_stop = meth_release, .ndo_start_xmit = meth_tx, - .ndo_do_ioctl = meth_ioctl, + .ndo_eth_ioctl = meth_ioctl, .ndo_tx_timeout = meth_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 676b193833c0..3d1a18a01ce5 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1841,7 +1841,7 @@ static int sis190_mac_addr(struct net_device *dev, void *p) static const struct net_device_ops sis190_netdev_ops = { .ndo_open = sis190_open, .ndo_stop = sis190_close, - .ndo_do_ioctl = sis190_ioctl, + .ndo_eth_ioctl = sis190_ioctl, .ndo_start_xmit = sis190_start_xmit, .ndo_tx_timeout = sis190_tx_timeout, .ndo_set_rx_mode = sis190_set_rx_mode, diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index cff87de9178a..60a0c0e9ded2 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -404,7 +404,7 @@ static const struct net_device_ops sis900_netdev_ops = { .ndo_set_rx_mode = set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = mii_ioctl, + .ndo_eth_ioctl = mii_ioctl, .ndo_tx_timeout = sis900_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = sis900_poll, diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index c52a38df0e0d..72e42a868346 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -23,6 +23,7 @@ config SMC9194 tristate "SMC 9194 support" depends on ISA select CRC32 + select NETDEV_LEGACY_INIT help This is support for the SMC9xxx based Ethernet cards. Choose this option if you have a DELL laptop with the docking station, or diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 51cd7dca91cd..44daf79a8f97 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -312,7 +312,7 @@ static const struct net_device_ops epic_netdev_ops = { .ndo_tx_timeout = epic_tx_timeout, .ndo_get_stats = epic_get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index bf7c8c8b1350..0ce403fa5f1a 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -1508,7 +1508,7 @@ MODULE_PARM_DESC(io, "SMC 99194 I/O base address"); MODULE_PARM_DESC(irq, "SMC 99194 IRQ number"); MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)"); -int __init init_module(void) +static int __init smc_init_module(void) { if (io == 0) printk(KERN_WARNING @@ -1518,13 +1518,15 @@ int __init init_module(void) devSMC9194 = smc_init(-1); return PTR_ERR_OR_ZERO(devSMC9194); } +module_init(smc_init_module); -void __exit cleanup_module(void) +static void __exit smc_cleanup_module(void) { unregister_netdev(devSMC9194); free_irq(devSMC9194->irq, devSMC9194); release_region(devSMC9194->base_addr, SMC_IO_EXTENT); free_netdev(devSMC9194); } +module_exit(smc_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index f2a50eb3c1e0..42fc37c7887a 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -294,7 +294,7 @@ static const struct net_device_ops smc_netdev_ops = { .ndo_tx_timeout = smc_tx_timeout, .ndo_set_config = s9k_config, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = smc_ioctl, + .ndo_eth_ioctl = smc_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 556a9790cdcf..199a97339280 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2148,7 +2148,7 @@ static const struct net_device_ops smsc911x_netdev_ops = { .ndo_start_xmit = smsc911x_hard_start_xmit, .ndo_get_stats = smsc911x_get_stats, .ndo_set_rx_mode = smsc911x_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = smsc911x_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index c1dab009415d..fdbd2a43e267 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1482,7 +1482,7 @@ static const struct net_device_ops smsc9420_netdev_ops = { .ndo_start_xmit = smsc9420_hard_start_xmit, .ndo_get_stats = smsc9420_get_stats, .ndo_set_rx_mode = smsc9420_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 20d148c019d8..d15f7b3a3f10 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1831,7 +1831,7 @@ static const struct net_device_ops netsec_netdev_ops = { .ndo_set_features = netsec_netdev_set_features, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_xdp_xmit = netsec_xdp_xmit, .ndo_bpf = netsec_xdp, }; diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 5eb6bb4f7b6c..ae31ed93aaf0 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1543,7 +1543,7 @@ static const struct net_device_ops ave_netdev_ops = { .ndo_open = ave_open, .ndo_stop = ave_stop, .ndo_start_xmit = ave_start_xmit, - .ndo_do_ioctl = ave_ioctl, + .ndo_eth_ioctl = ave_ioctl, .ndo_set_rx_mode = ave_set_rx_mode, .ndo_get_stats64 = ave_get_stats64, .ndo_set_mac_address = ave_set_mac_address, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7b8404a21544..a2aa75cb184e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6451,7 +6451,7 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_set_features = stmmac_set_features, .ndo_set_rx_mode = stmmac_set_rx_mode, .ndo_tx_timeout = stmmac_tx_timeout, - .ndo_do_ioctl = stmmac_ioctl, + .ndo_eth_ioctl = stmmac_ioctl, .ndo_setup_tc = stmmac_setup_tc, .ndo_select_queue = stmmac_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 981685c88308..287ae4c538aa 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -4876,7 +4876,7 @@ static const struct net_device_ops cas_netdev_ops = { .ndo_start_xmit = cas_start_xmit, .ndo_get_stats = cas_get_stats, .ndo_set_rx_mode = cas_set_multicast, - .ndo_do_ioctl = cas_ioctl, + .ndo_eth_ioctl = cas_ioctl, .ndo_tx_timeout = cas_tx_timeout, .ndo_change_mtu = cas_change_mtu, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 860644d182ab..1501e8906be4 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9208,7 +9208,7 @@ static int niu_get_of_props(struct niu *np) else dp = pci_device_to_OF_node(np->pdev); - phy_type = of_get_property(dp, "phy-type", &prop_len); + phy_type = of_get_property(dp, "phy-type", NULL); if (!phy_type) { netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp); return -EINVAL; @@ -9242,12 +9242,12 @@ static int niu_get_of_props(struct niu *np) return -EINVAL; } - model = of_get_property(dp, "model", &prop_len); + model = of_get_property(dp, "model", NULL); if (model) strcpy(np->vpd.model, model); - if (of_find_property(dp, "hot-swappable-phy", &prop_len)) { + if (of_find_property(dp, "hot-swappable-phy", NULL)) { np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER | NIU_FLAGS_HOTPLUG_PHY); } @@ -9668,7 +9668,7 @@ static const struct net_device_ops niu_netdev_ops = { .ndo_set_rx_mode = niu_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = niu_set_mac_addr, - .ndo_do_ioctl = niu_ioctl, + .ndo_eth_ioctl = niu_ioctl, .ndo_tx_timeout = niu_tx_timeout, .ndo_change_mtu = niu_change_mtu, }; diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index cfb9e21b18b7..d72018a60c0f 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2831,7 +2831,7 @@ static const struct net_device_ops gem_netdev_ops = { .ndo_start_xmit = gem_start_xmit, .ndo_get_stats = gem_get_stats, .ndo_set_rx_mode = gem_set_multicast, - .ndo_do_ioctl = gem_ioctl, + .ndo_eth_ioctl = gem_ioctl, .ndo_tx_timeout = gem_tx_timeout, .ndo_change_mtu = gem_change_mtu, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index 26d178f8616b..1db7104fef3a 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -933,7 +933,7 @@ static const struct net_device_ops xlgmac_netdev_ops = { .ndo_change_mtu = xlgmac_change_mtu, .ndo_set_mac_address = xlgmac_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = xlgmac_ioctl, + .ndo_eth_ioctl = xlgmac_ioctl, .ndo_vlan_rx_add_vid = xlgmac_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = xlgmac_vlan_rx_kill_vid, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index d054c6e83b1c..8f6abaec41d1 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -637,7 +637,8 @@ static int bdx_range_check(struct bdx_priv *priv, u32 offset) -EINVAL : 0; } -static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) +static int bdx_siocdevprivate(struct net_device *ndev, struct ifreq *ifr, + void __user *udata, int cmd) { struct bdx_priv *priv = netdev_priv(ndev); u32 data[3]; @@ -647,7 +648,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) DBG("jiffies=%ld cmd=%d\n", jiffies, cmd); if (cmd != SIOCDEVPRIVATE) { - error = copy_from_user(data, ifr->ifr_data, sizeof(data)); + error = copy_from_user(data, udata, sizeof(data)); if (error) { pr_err("can't copy from user\n"); RET(-EFAULT); @@ -669,7 +670,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) data[2] = READ_REG(priv, data[1]); DBG("read_reg(0x%x)=0x%x (dec %d)\n", data[1], data[2], data[2]); - error = copy_to_user(ifr->ifr_data, data, sizeof(data)); + error = copy_to_user(udata, data, sizeof(data)); if (error) RET(-EFAULT); break; @@ -688,15 +689,6 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) return 0; } -static int bdx_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) -{ - ENTER; - if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - RET(bdx_ioctl_priv(ndev, ifr, cmd)); - else - RET(-EOPNOTSUPP); -} - /** * __bdx_vlan_rx_vid - private helper for adding/killing VLAN vid * @ndev: network device @@ -1860,7 +1852,7 @@ static const struct net_device_ops bdx_netdev_ops = { .ndo_stop = bdx_close, .ndo_start_xmit = bdx_tx_transmit, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = bdx_ioctl, + .ndo_siocdevprivate = bdx_siocdevprivate, .ndo_set_rx_mode = bdx_setmulti, .ndo_change_mtu = bdx_change_mtu, .ndo_set_mac_address = bdx_set_mac, diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 67a08cbba859..9066937db3b9 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -27,6 +27,7 @@ #include <linux/sys_soc.h> #include <linux/dma/ti-cppi5.h> #include <linux/dma/k3-udma-glue.h> +#include <net/switchdev.h> #include "cpsw_ale.h" #include "cpsw_sl.h" @@ -1479,7 +1480,7 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_tx_timeout = am65_cpsw_nuss_ndo_host_tx_timeout, .ndo_vlan_rx_add_vid = am65_cpsw_nuss_ndo_slave_add_vid, .ndo_vlan_rx_kill_vid = am65_cpsw_nuss_ndo_slave_kill_vid, - .ndo_do_ioctl = am65_cpsw_nuss_ndo_slave_ioctl, + .ndo_eth_ioctl = am65_cpsw_nuss_ndo_slave_ioctl, .ndo_setup_tc = am65_cpsw_qos_ndo_setup_tc, .ndo_get_devlink_port = am65_cpsw_ndo_get_devlink_port, }; @@ -2081,10 +2082,13 @@ bool am65_cpsw_port_dev_check(const struct net_device *ndev) return false; } -static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_device *br_ndev) +static int am65_cpsw_netdevice_port_link(struct net_device *ndev, + struct net_device *br_ndev, + struct netlink_ext_ack *extack) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev); + int err; if (!common->br_members) { common->hw_bridge_dev = br_ndev; @@ -2096,6 +2100,11 @@ static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_dev return -EOPNOTSUPP; } + err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL, + false, extack); + if (err) + return err; + common->br_members |= BIT(priv->port->port_id); am65_cpsw_port_offload_fwd_mark_update(common); @@ -2108,6 +2117,8 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev) struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev); + switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL); + common->br_members &= ~BIT(priv->port->port_id); am65_cpsw_port_offload_fwd_mark_update(common); @@ -2120,6 +2131,7 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev) static int am65_cpsw_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; int ret = NOTIFY_DONE; @@ -2133,7 +2145,9 @@ static int am65_cpsw_netdevice_event(struct notifier_block *unused, if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) - ret = am65_cpsw_netdevice_port_link(ndev, info->upper_dev); + ret = am65_cpsw_netdevice_port_link(ndev, + info->upper_dev, + extack); else am65_cpsw_netdevice_port_unlink(ndev); } @@ -2388,21 +2402,6 @@ static const struct devlink_param am65_cpsw_devlink_params[] = { am65_cpsw_dl_switch_mode_set, NULL), }; -static void am65_cpsw_unregister_devlink_ports(struct am65_cpsw_common *common) -{ - struct devlink_port *dl_port; - struct am65_cpsw_port *port; - int i; - - for (i = 1; i <= common->port_num; i++) { - port = am65_common_get_port(common, i); - dl_port = &port->devlink_port; - - if (dl_port->registered) - devlink_port_unregister(dl_port); - } -} - static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) { struct devlink_port_attrs attrs = {}; @@ -2464,7 +2463,12 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) return ret; dl_port_unreg: - am65_cpsw_unregister_devlink_ports(common); + for (i = i - 1; i >= 1; i--) { + port = am65_common_get_port(common, i); + dl_port = &port->devlink_port; + + devlink_port_unregister(dl_port); + } dl_unreg: devlink_unregister(common->devlink); dl_free: @@ -2475,6 +2479,17 @@ dl_free: static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) { + struct devlink_port *dl_port; + struct am65_cpsw_port *port; + int i; + + for (i = 1; i <= common->port_num; i++) { + port = am65_common_get_port(common, i); + dl_port = &port->devlink_port; + + devlink_port_unregister(dl_port); + } + if (!AM65_CPSW_IS_CPSW2G(common) && IS_ENABLED(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV)) { devlink_params_unpublish(common->devlink); @@ -2482,7 +2497,6 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) ARRAY_SIZE(am65_cpsw_devlink_params)); } - am65_cpsw_unregister_devlink_ports(common); devlink_unregister(common->devlink); devlink_free(common->devlink); } diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index c20715107075..02d4e51f7306 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1044,7 +1044,7 @@ static const struct net_device_ops cpmac_netdev_ops = { .ndo_start_xmit = cpmac_start_xmit, .ndo_tx_timeout = cpmac_tx_timeout, .ndo_set_rx_mode = cpmac_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index cbbd0f665796..abf9a2a6f7eb 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1159,7 +1159,7 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_stop = cpsw_ndo_stop, .ndo_start_xmit = cpsw_ndo_start_xmit, .ndo_set_mac_address = cpsw_ndo_set_mac_address, - .ndo_do_ioctl = cpsw_ndo_ioctl, + .ndo_eth_ioctl = cpsw_ndo_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 57d279fdcc9f..ae167223e87f 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -28,6 +28,7 @@ #include <linux/kmemleak.h> #include <linux/sys_soc.h> +#include <net/switchdev.h> #include <net/page_pool.h> #include <net/pkt_cls.h> #include <net/devlink.h> @@ -1127,7 +1128,7 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_stop = cpsw_ndo_stop, .ndo_start_xmit = cpsw_ndo_start_xmit, .ndo_set_mac_address = cpsw_ndo_set_mac_address, - .ndo_do_ioctl = cpsw_ndo_ioctl, + .ndo_eth_ioctl = cpsw_ndo_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, @@ -1499,10 +1500,12 @@ static void cpsw_port_offload_fwd_mark_update(struct cpsw_common *cpsw) } static int cpsw_netdevice_port_link(struct net_device *ndev, - struct net_device *br_ndev) + struct net_device *br_ndev, + struct netlink_ext_ack *extack) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; + int err; if (!cpsw->br_members) { cpsw->hw_bridge_dev = br_ndev; @@ -1514,6 +1517,11 @@ static int cpsw_netdevice_port_link(struct net_device *ndev, return -EOPNOTSUPP; } + err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL, + false, extack); + if (err) + return err; + cpsw->br_members |= BIT(priv->emac_port); cpsw_port_offload_fwd_mark_update(cpsw); @@ -1526,6 +1534,8 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev) struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; + switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL); + cpsw->br_members &= ~BIT(priv->emac_port); cpsw_port_offload_fwd_mark_update(cpsw); @@ -1538,6 +1548,7 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev) static int cpsw_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; int ret = NOTIFY_DONE; @@ -1552,7 +1563,8 @@ static int cpsw_netdevice_event(struct notifier_block *unused, if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) ret = cpsw_netdevice_port_link(ndev, - info->upper_dev); + info->upper_dev, + extack); else cpsw_netdevice_port_unlink(ndev); } diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index c674e34b6839..637796670746 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1670,7 +1670,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_start_xmit = emac_dev_xmit, .ndo_set_rx_mode = emac_dev_mcast_set, .ndo_set_mac_address = emac_dev_setmac_addr, - .ndo_do_ioctl = emac_devioctl, + .ndo_eth_ioctl = emac_devioctl, .ndo_tx_timeout = emac_dev_tx_timeout, .ndo_get_stats = emac_dev_getnetstats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 97942b0e3897..eda2961c0fe2 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1944,7 +1944,7 @@ static const struct net_device_ops netcp_netdev_ops = { .ndo_stop = netcp_ndo_stop, .ndo_start_xmit = netcp_ndo_start_xmit, .ndo_set_rx_mode = netcp_set_rx_mode, - .ndo_do_ioctl = netcp_ndo_ioctl, + .ndo_eth_ioctl = netcp_ndo_ioctl, .ndo_get_stats64 = netcp_get_stats, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index e0cb713193ea..77c448ad67ce 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -749,7 +749,7 @@ static const struct net_device_ops tlan_netdev_ops = { .ndo_tx_timeout = tlan_tx_timeout, .ndo_get_stats = tlan_get_stats, .ndo_set_rx_mode = tlan_set_multicast_list, - .ndo_do_ioctl = tlan_ioctl, + .ndo_eth_ioctl = tlan_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 226a76633e65..087f0af56c50 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2214,7 +2214,7 @@ static const struct net_device_ops spider_net_ops = { .ndo_start_xmit = spider_net_xmit, .ndo_set_rx_mode = spider_net_set_multi, .ndo_set_mac_address = spider_net_set_mac, - .ndo_do_ioctl = spider_net_do_ioctl, + .ndo_eth_ioctl = spider_net_do_ioctl, .ndo_tx_timeout = spider_net_tx_timeout, .ndo_validate_addr = eth_validate_addr, /* HW VLAN */ diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index fedb2bf69261..52245ac60fc7 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -750,7 +750,7 @@ static const struct net_device_ops tc35815_netdev_ops = { .ndo_get_stats = tc35815_get_stats, .ndo_set_rx_mode = tc35815_set_multicast_list, .ndo_tx_timeout = tc35815_tx_timeout, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index c62f474b6d08..cf0917b29e30 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1538,7 +1538,7 @@ static const struct net_device_ops tsi108_netdev_ops = { .ndo_start_xmit = tsi108_send_packet, .ndo_set_rx_mode = tsi108_set_rx_mode, .ndo_get_stats = tsi108_get_stats, - .ndo_do_ioctl = tsi108_do_ioctl, + .ndo_eth_ioctl = tsi108_do_ioctl, .ndo_set_mac_address = tsi108_set_mac, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 73ca597ebd1b..961b623b7880 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -884,7 +884,7 @@ static const struct net_device_ops rhine_netdev_ops = { .ndo_set_rx_mode = rhine_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = rhine_tx_timeout, .ndo_vlan_rx_add_vid = rhine_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = rhine_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 88426b5e410b..278f49518d3f 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2637,7 +2637,7 @@ static const struct net_device_ops velocity_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_set_rx_mode = velocity_set_multi, .ndo_change_mtu = velocity_change_mtu, - .ndo_do_ioctl = velocity_ioctl, + .ndo_eth_ioctl = velocity_ioctl, .ndo_vlan_rx_add_vid = velocity_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = velocity_vlan_rx_kill_vid, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 60a4f79b8fa1..db1994fb51c5 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1237,7 +1237,7 @@ static const struct net_device_ops temac_netdev_ops = { .ndo_set_rx_mode = temac_set_multicast_list, .ndo_set_mac_address = temac_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = temac_poll_controller, #endif diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 13cd799541aa..348c0ba5edcf 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1227,7 +1227,7 @@ static const struct net_device_ops axienet_netdev_ops = { .ndo_change_mtu = axienet_change_mtu, .ndo_set_mac_address = netdev_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = axienet_ioctl, + .ndo_eth_ioctl = axienet_ioctl, .ndo_set_rx_mode = axienet_set_multicast_list, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = axienet_poll_controller, diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index b06377fe7293..b780aad3550a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1263,7 +1263,7 @@ static const struct net_device_ops xemaclite_netdev_ops = { .ndo_start_xmit = xemaclite_send, .ndo_set_mac_address = xemaclite_set_mac_address, .ndo_tx_timeout = xemaclite_tx_timeout, - .ndo_do_ioctl = xemaclite_ioctl, + .ndo_eth_ioctl = xemaclite_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = xemaclite_poll_controller, #endif diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 4f6db6f5c272..ae611e46da6a 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -464,7 +464,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = do_start_xmit, .ndo_tx_timeout = xirc_tx_timeout, .ndo_set_config = do_config, - .ndo_do_ioctl = do_ioctl, + .ndo_eth_ioctl = do_ioctl, .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 7ae754eadf22..ff50305d6e13 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1357,7 +1357,7 @@ static const struct net_device_ops ixp4xx_netdev_ops = { .ndo_stop = eth_close, .ndo_start_xmit = eth_xmit, .ndo_set_rx_mode = eth_set_mcast_list, - .ndo_do_ioctl = eth_ioctl, + .ndo_eth_ioctl = eth_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c index 69c29a2ef95d..f62e98fada1a 100644 --- a/drivers/net/fddi/skfp/skfddi.c +++ b/drivers/net/fddi/skfp/skfddi.c @@ -70,6 +70,7 @@ static const char * const boot_msg = /* Include files */ #include <linux/capability.h> +#include <linux/compat.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -103,7 +104,8 @@ static struct net_device_stats *skfp_ctl_get_stats(struct net_device *dev); static void skfp_ctl_set_multicast_list(struct net_device *dev); static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev); static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr); -static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int skfp_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); static netdev_tx_t skfp_send_pkt(struct sk_buff *skb, struct net_device *dev); static void send_queued_packets(struct s_smc *smc); @@ -164,7 +166,7 @@ static const struct net_device_ops skfp_netdev_ops = { .ndo_get_stats = skfp_ctl_get_stats, .ndo_set_rx_mode = skfp_ctl_set_multicast_list, .ndo_set_mac_address = skfp_ctl_set_mac_address, - .ndo_do_ioctl = skfp_ioctl, + .ndo_siocdevprivate = skfp_siocdevprivate, }; /* @@ -932,9 +934,9 @@ static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr) /* - * ============== - * = skfp_ioctl = - * ============== + * ======================= + * = skfp_siocdevprivate = + * ======================= * * Overview: * @@ -954,16 +956,19 @@ static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr) */ -static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int skfp_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct s_smc *smc = netdev_priv(dev); skfddi_priv *lp = &smc->os; struct s_skfp_ioctl ioc; int status = 0; - if (copy_from_user(&ioc, rq->ifr_data, sizeof(struct s_skfp_ioctl))) + if (copy_from_user(&ioc, data, sizeof(struct s_skfp_ioctl))) return -EFAULT; + if (in_compat_syscall()) + return -EOPNOTSUPP; + switch (ioc.cmd) { case SKFP_GET_STATS: /* Get the driver statistics */ ioc.len = sizeof(lp->MacStat); diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 4435a1195194..775dcf4ebde5 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -1005,7 +1005,8 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr) /* --------------------------------------------------------------------- */ -static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { struct baycom_state *bc = netdev_priv(dev); struct hdlcdrv_ioctl hi; @@ -1013,7 +1014,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (cmd != SIOCDEVPRIVATE) return -ENOIOCTLCMD; - if (copy_from_user(&hi, ifr->ifr_data, sizeof(hi))) + if (copy_from_user(&hi, data, sizeof(hi))) return -EFAULT; switch (hi.cmd) { default: @@ -1104,7 +1105,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return HDLCDRV_PARMASK_IOBASE; } - if (copy_to_user(ifr->ifr_data, &hi, sizeof(hi))) + if (copy_to_user(data, &hi, sizeof(hi))) return -EFAULT; return 0; } @@ -1114,7 +1115,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static const struct net_device_ops baycom_netdev_ops = { .ndo_open = epp_open, .ndo_stop = epp_close, - .ndo_do_ioctl = baycom_ioctl, + .ndo_siocdevprivate = baycom_siocdevprivate, .ndo_start_xmit = baycom_send_packet, .ndo_set_mac_address = baycom_set_mac_address, }; diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c index 6a3dc7b3f28a..fd7da5bb1fa5 100644 --- a/drivers/net/hamradio/baycom_par.c +++ b/drivers/net/hamradio/baycom_par.c @@ -380,7 +380,7 @@ static int par96_close(struct net_device *dev) * ===================== hdlcdrv driver interface ========================= */ -static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, +static int baycom_ioctl(struct net_device *dev, void __user *data, struct hdlcdrv_ioctl *hi, int cmd); /* --------------------------------------------------------------------- */ @@ -408,7 +408,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr) /* --------------------------------------------------------------------- */ -static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, +static int baycom_ioctl(struct net_device *dev, void __user *data, struct hdlcdrv_ioctl *hi, int cmd) { struct baycom_state *bc; @@ -428,7 +428,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, case HDLCDRVCTL_GETMODE: strcpy(hi->data.modename, bc->options ? "par96" : "picpar"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -440,7 +440,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, case HDLCDRVCTL_MODELIST: strcpy(hi->data.modename, "par96,picpar"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -449,7 +449,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, } - if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) + if (copy_from_user(&bi, data, sizeof(bi))) return -EFAULT; switch (bi.cmd) { default: @@ -464,7 +464,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, #endif /* BAYCOM_DEBUG */ } - if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) + if (copy_to_user(data, &bi, sizeof(bi))) return -EFAULT; return 0; diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index 04bb409707fc..646f605e358f 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -462,7 +462,7 @@ static int ser12_close(struct net_device *dev) /* --------------------------------------------------------------------- */ -static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, +static int baycom_ioctl(struct net_device *dev, void __user *data, struct hdlcdrv_ioctl *hi, int cmd); /* --------------------------------------------------------------------- */ @@ -497,7 +497,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr) /* --------------------------------------------------------------------- */ -static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, +static int baycom_ioctl(struct net_device *dev, void __user *data, struct hdlcdrv_ioctl *hi, int cmd) { struct baycom_state *bc; @@ -519,7 +519,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, sprintf(hi->data.modename, "ser%u", bc->baud / 100); if (bc->opt_dcd <= 0) strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : "+"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -531,7 +531,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, case HDLCDRVCTL_MODELIST: strcpy(hi->data.modename, "ser12,ser3,ser24"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -540,7 +540,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, } - if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) + if (copy_from_user(&bi, data, sizeof(bi))) return -EFAULT; switch (bi.cmd) { default: @@ -555,7 +555,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, #endif /* BAYCOM_DEBUG */ } - if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) + if (copy_to_user(data, &bi, sizeof(bi))) return -EFAULT; return 0; diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index a1acb3a47bdb..5d1ab4840753 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -521,7 +521,7 @@ static int ser12_close(struct net_device *dev) /* --------------------------------------------------------------------- */ -static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, +static int baycom_ioctl(struct net_device *dev, void __user *data, struct hdlcdrv_ioctl *hi, int cmd); /* --------------------------------------------------------------------- */ @@ -551,7 +551,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr) /* --------------------------------------------------------------------- */ -static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, +static int baycom_ioctl(struct net_device *dev, void __user *data, struct hdlcdrv_ioctl *hi, int cmd) { struct baycom_state *bc; @@ -573,7 +573,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, strcpy(hi->data.modename, "ser12"); if (bc->opt_dcd <= 0) strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : (bc->opt_dcd == -2) ? "@" : "+"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -585,7 +585,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, case HDLCDRVCTL_MODELIST: strcpy(hi->data.modename, "ser12"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -594,7 +594,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, } - if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) + if (copy_from_user(&bi, data, sizeof(bi))) return -EFAULT; switch (bi.cmd) { default: @@ -609,7 +609,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, #endif /* BAYCOM_DEBUG */ } - if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) + if (copy_to_user(data, &bi, sizeof(bi))) return -EFAULT; return 0; diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 0e623c2e8b2d..d967b0748773 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -314,9 +314,10 @@ static int bpq_set_mac_address(struct net_device *dev, void *addr) * source ethernet address (broadcast * or multicast: accept all) */ -static int bpq_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int bpq_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { - struct bpq_ethaddr __user *ethaddr = ifr->ifr_data; + struct bpq_ethaddr __user *ethaddr = data; struct bpqdev *bpq = netdev_priv(dev); struct bpq_req req; @@ -325,7 +326,7 @@ static int bpq_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCSBPQETHOPT: - if (copy_from_user(&req, ifr->ifr_data, sizeof(struct bpq_req))) + if (copy_from_user(&req, data, sizeof(struct bpq_req))) return -EFAULT; switch (req.cmd) { case SIOCGBPQETHPARAM: @@ -448,7 +449,7 @@ static const struct net_device_ops bpq_netdev_ops = { .ndo_stop = bpq_close, .ndo_start_xmit = bpq_xmit, .ndo_set_mac_address = bpq_set_mac_address, - .ndo_do_ioctl = bpq_ioctl, + .ndo_siocdevprivate = bpq_siocdevprivate, }; static void bpq_setup(struct net_device *dev) diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index c25c8c99c5c7..b50b7fafd8d6 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -225,7 +225,8 @@ static int read_scc_data(struct scc_priv *priv); static int scc_open(struct net_device *dev); static int scc_close(struct net_device *dev); -static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +static int scc_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); static int scc_send_packet(struct sk_buff *skb, struct net_device *dev); static int scc_set_mac_address(struct net_device *dev, void *sa); @@ -432,7 +433,7 @@ static const struct net_device_ops scc_netdev_ops = { .ndo_open = scc_open, .ndo_stop = scc_close, .ndo_start_xmit = scc_send_packet, - .ndo_do_ioctl = scc_ioctl, + .ndo_siocdevprivate = scc_siocdevprivate, .ndo_set_mac_address = scc_set_mac_address, }; @@ -881,15 +882,13 @@ static int scc_close(struct net_device *dev) } -static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int scc_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { struct scc_priv *priv = dev->ml_priv; switch (cmd) { case SIOCGSCCPARAM: - if (copy_to_user - (ifr->ifr_data, &priv->param, - sizeof(struct scc_param))) + if (copy_to_user(data, &priv->param, sizeof(struct scc_param))) return -EFAULT; return 0; case SIOCSSCCPARAM: @@ -897,13 +896,12 @@ static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EPERM; if (netif_running(dev)) return -EAGAIN; - if (copy_from_user - (&priv->param, ifr->ifr_data, - sizeof(struct scc_param))) + if (copy_from_user(&priv->param, data, + sizeof(struct scc_param))) return -EFAULT; return 0; default: - return -EINVAL; + return -EOPNOTSUPP; } } diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index cbaf1cdde7cb..5805cfc83854 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -483,23 +483,25 @@ static int hdlcdrv_close(struct net_device *dev) /* --------------------------------------------------------------------- */ -static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int hdlcdrv_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { struct hdlcdrv_state *s = netdev_priv(dev); struct hdlcdrv_ioctl bi; - if (cmd != SIOCDEVPRIVATE) { - if (s->ops && s->ops->ioctl) - return s->ops->ioctl(dev, ifr, &bi, cmd); + if (cmd != SIOCDEVPRIVATE) return -ENOIOCTLCMD; - } - if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) + + if (in_compat_syscall()) /* to be implemented */ + return -ENOIOCTLCMD; + + if (copy_from_user(&bi, data, sizeof(bi))) return -EFAULT; switch (bi.cmd) { default: if (s->ops && s->ops->ioctl) - return s->ops->ioctl(dev, ifr, &bi, cmd); + return s->ops->ioctl(dev, data, &bi, cmd); return -ENOIOCTLCMD; case HDLCDRVCTL_GETCHANNELPAR: @@ -605,7 +607,7 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } - if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) + if (copy_to_user(data, &bi, sizeof(bi))) return -EFAULT; return 0; @@ -617,7 +619,7 @@ static const struct net_device_ops hdlcdrv_netdev = { .ndo_open = hdlcdrv_open, .ndo_stop = hdlcdrv_close, .ndo_start_xmit = hdlcdrv_send_packet, - .ndo_do_ioctl = hdlcdrv_ioctl, + .ndo_siocdevprivate = hdlcdrv_siocdevprivate, .ndo_set_mac_address = hdlcdrv_set_mac_address, }; diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index 3f1edd0526a4..e0bb131a33d7 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -210,7 +210,8 @@ static int scc_net_close(struct net_device *dev); static void scc_net_rx(struct scc_channel *scc, struct sk_buff *skb); static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev); -static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +static int scc_net_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); static int scc_net_set_mac_address(struct net_device *dev, void *addr); static struct net_device_stats * scc_net_get_stats(struct net_device *dev); @@ -1550,7 +1551,7 @@ static const struct net_device_ops scc_netdev_ops = { .ndo_start_xmit = scc_net_tx, .ndo_set_mac_address = scc_net_set_mac_address, .ndo_get_stats = scc_net_get_stats, - .ndo_do_ioctl = scc_net_ioctl, + .ndo_siocdevprivate = scc_net_siocdevprivate, }; /* ----> Initialize device <----- */ @@ -1703,7 +1704,8 @@ static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev) * SIOCSCCCAL - send calib. pattern arg: (struct scc_calibrate *) arg */ -static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int scc_net_siocdevprivate(struct net_device *dev, + struct ifreq *ifr, void __user *arg, int cmd) { struct scc_kiss_cmd kiss_cmd; struct scc_mem_config memcfg; @@ -1712,8 +1714,6 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct scc_channel *scc = (struct scc_channel *) dev->ml_priv; int chan; unsigned char device_name[IFNAMSIZ]; - void __user *arg = ifr->ifr_data; - if (!Driver_Initialized) { @@ -1722,6 +1722,9 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) int found = 1; if (!capable(CAP_SYS_RAWIO)) return -EPERM; + if (in_compat_syscall()) + return -EOPNOTSUPP; + if (!arg) return -EFAULT; if (Nchips >= SCC_MAXCHIPS) diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index d4911041596c..6ddacbdb224b 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -920,15 +920,15 @@ static int yam_close(struct net_device *dev) /* --------------------------------------------------------------------- */ -static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { struct yam_port *yp = netdev_priv(dev); struct yamdrv_ioctl_cfg yi; struct yamdrv_ioctl_mcs *ym; int ioctl_cmd; - if (copy_from_user(&ioctl_cmd, ifr->ifr_data, sizeof(int))) - return -EFAULT; + if (copy_from_user(&ioctl_cmd, data, sizeof(int))) + return -EFAULT; if (yp->magic != YAM_MAGIC) return -EINVAL; @@ -947,8 +947,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCYAMSMCS: if (netif_running(dev)) return -EINVAL; /* Cannot change this parameter when up */ - ym = memdup_user(ifr->ifr_data, - sizeof(struct yamdrv_ioctl_mcs)); + ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); if (ym->cmd != SIOCYAMSMCS) @@ -965,8 +964,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCYAMSCFG: if (!capable(CAP_SYS_RAWIO)) return -EPERM; - if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg))) - return -EFAULT; + if (copy_from_user(&yi, data, sizeof(struct yamdrv_ioctl_cfg))) + return -EFAULT; if (yi.cmd != SIOCYAMSCFG) return -EINVAL; @@ -1045,8 +1044,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) yi.cfg.txtail = yp->txtail; yi.cfg.persist = yp->pers; yi.cfg.slottime = yp->slot; - if (copy_to_user(ifr->ifr_data, &yi, sizeof(struct yamdrv_ioctl_cfg))) - return -EFAULT; + if (copy_to_user(data, &yi, sizeof(struct yamdrv_ioctl_cfg))) + return -EFAULT; break; default: @@ -1074,7 +1073,7 @@ static const struct net_device_ops yam_netdev_ops = { .ndo_open = yam_open, .ndo_stop = yam_close, .ndo_start_xmit = yam_send_packet, - .ndo_do_ioctl = yam_ioctl, + .ndo_siocdevprivate = yam_siocdevprivate, .ndo_set_mac_address = yam_set_mac_address, }; diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 22010384c4a3..7661dbb31162 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -63,7 +63,7 @@ static const char version[] = static const struct net_device_ops rr_netdev_ops = { .ndo_open = rr_open, .ndo_stop = rr_close, - .ndo_do_ioctl = rr_ioctl, + .ndo_siocdevprivate = rr_siocdevprivate, .ndo_start_xmit = rr_start_xmit, .ndo_set_mac_address = hippi_mac_addr, }; @@ -1568,7 +1568,8 @@ out: } -static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int rr_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd) { struct rr_private *rrpriv; unsigned char *image, *oldimage; @@ -1603,7 +1604,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) error = -EFAULT; goto gf_out; } - error = copy_to_user(rq->ifr_data, image, EEPROM_BYTES); + error = copy_to_user(data, image, EEPROM_BYTES); if (error) error = -EFAULT; gf_out: @@ -1615,7 +1616,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EPERM; } - image = memdup_user(rq->ifr_data, EEPROM_BYTES); + image = memdup_user(data, EEPROM_BYTES); if (IS_ERR(image)) return PTR_ERR(image); @@ -1658,7 +1659,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return error; case SIOCRRID: - return put_user(0x52523032, (int __user *)rq->ifr_data); + return put_user(0x52523032, (int __user *)data); default: return error; } diff --git a/drivers/net/hippi/rrunner.h b/drivers/net/hippi/rrunner.h index 87533784604f..55377614e752 100644 --- a/drivers/net/hippi/rrunner.h +++ b/drivers/net/hippi/rrunner.h @@ -835,7 +835,8 @@ static int rr_open(struct net_device *dev); static netdev_tx_t rr_start_xmit(struct sk_buff *skb, struct net_device *dev); static int rr_close(struct net_device *dev); -static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int rr_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); static unsigned int rr_read_eeprom(struct rr_private *rrpriv, unsigned long offset, unsigned char *buf, diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile index 506f8d5cd4ee..75435d40b920 100644 --- a/drivers/net/ipa/Makefile +++ b/drivers/net/ipa/Makefile @@ -1,6 +1,3 @@ -# Un-comment the next line if you want to validate configuration data -#ccflags-y += -DIPA_VALIDATE - obj-$(CONFIG_QCOM_IPA) += ipa.o ipa-y := ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \ diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 427c68b2ad8f..a2fcdb1abdb9 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -198,77 +198,6 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id) gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id)); } -/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */ -static void gsi_irq_setup(struct gsi *gsi) -{ - /* Disable all interrupt types */ - gsi_irq_type_update(gsi, 0); - - /* Clear all type-specific interrupt masks */ - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); - - /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */ - if (gsi->version > IPA_VERSION_3_1) { - u32 offset; - - /* These registers are in the non-adjusted address range */ - offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET; - iowrite32(0, gsi->virt_raw + offset); - offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET; - iowrite32(0, gsi->virt_raw + offset); - } - - iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); -} - -/* Get # supported channel and event rings; there is no gsi_ring_teardown() */ -static int gsi_ring_setup(struct gsi *gsi) -{ - struct device *dev = gsi->dev; - u32 count; - u32 val; - - if (gsi->version < IPA_VERSION_3_5_1) { - /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */ - gsi->channel_count = GSI_CHANNEL_COUNT_MAX; - gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX; - - return 0; - } - - val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET); - - count = u32_get_bits(val, NUM_CH_PER_EE_FMASK); - if (!count) { - dev_err(dev, "GSI reports zero channels supported\n"); - return -EINVAL; - } - if (count > GSI_CHANNEL_COUNT_MAX) { - dev_warn(dev, "limiting to %u channels; hardware supports %u\n", - GSI_CHANNEL_COUNT_MAX, count); - count = GSI_CHANNEL_COUNT_MAX; - } - gsi->channel_count = count; - - count = u32_get_bits(val, NUM_EV_PER_EE_FMASK); - if (!count) { - dev_err(dev, "GSI reports zero event rings supported\n"); - return -EINVAL; - } - if (count > GSI_EVT_RING_COUNT_MAX) { - dev_warn(dev, - "limiting to %u event rings; hardware supports %u\n", - GSI_EVT_RING_COUNT_MAX, count); - count = GSI_EVT_RING_COUNT_MAX; - } - gsi->evt_ring_count = count; - - return 0; -} - /* Event ring commands are performed one at a time. Their completion * is signaled by the event ring control GSI interrupt type, which is * only enabled when we issue an event ring command. Only the event @@ -920,12 +849,13 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) /* All done! */ } -static int __gsi_channel_start(struct gsi_channel *channel, bool start) +static int __gsi_channel_start(struct gsi_channel *channel, bool resume) { struct gsi *gsi = channel->gsi; int ret; - if (!start) + /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */ + if (resume && gsi->version < IPA_VERSION_4_0) return 0; mutex_lock(&gsi->mutex); @@ -947,7 +877,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) napi_enable(&channel->napi); gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); - ret = __gsi_channel_start(channel, true); + ret = __gsi_channel_start(channel, false); if (ret) { gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); napi_disable(&channel->napi); @@ -971,7 +901,7 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel) return ret; } -static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) +static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend) { struct gsi *gsi = channel->gsi; int ret; @@ -979,7 +909,8 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) /* Wait for any underway transactions to complete before stopping. */ gsi_channel_trans_quiesce(channel); - if (!stop) + /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */ + if (suspend && gsi->version < IPA_VERSION_4_0) return 0; mutex_lock(&gsi->mutex); @@ -997,7 +928,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; - ret = __gsi_channel_stop(channel, true); + ret = __gsi_channel_stop(channel, false); if (ret) return ret; @@ -1026,13 +957,13 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell) mutex_unlock(&gsi->mutex); } -/* Stop a STARTED channel for suspend (using stop if requested) */ -int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) +/* Stop a started channel for suspend */ +int gsi_channel_suspend(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; - ret = __gsi_channel_stop(channel, stop); + ret = __gsi_channel_stop(channel, true); if (ret) return ret; @@ -1042,12 +973,24 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) return 0; } -/* Resume a suspended channel (starting will be requested if STOPPED) */ -int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start) +/* Resume a suspended channel (starting if stopped) */ +int gsi_channel_resume(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; - return __gsi_channel_start(channel, start); + return __gsi_channel_start(channel, true); +} + +/* Prevent all GSI interrupts while suspended */ +void gsi_suspend(struct gsi *gsi) +{ + disable_irq(gsi->irq); +} + +/* Allow all GSI interrupts again when resuming */ +void gsi_resume(struct gsi *gsi) +{ + enable_irq(gsi->irq); } /** @@ -1372,33 +1315,20 @@ static irqreturn_t gsi_isr(int irq, void *dev_id) return IRQ_HANDLED; } +/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */ static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev) { - struct device *dev = &pdev->dev; - unsigned int irq; int ret; ret = platform_get_irq_byname(pdev, "gsi"); if (ret <= 0) return ret ? : -EINVAL; - irq = ret; - - ret = request_irq(irq, gsi_isr, 0, "gsi", gsi); - if (ret) { - dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret); - return ret; - } - gsi->irq = irq; + gsi->irq = ret; return 0; } -static void gsi_irq_exit(struct gsi *gsi) -{ - free_irq(gsi->irq, gsi); -} - /* Return the transaction associated with a transfer completion event */ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel, struct gsi_event *event) @@ -1876,6 +1806,93 @@ static void gsi_channel_teardown(struct gsi *gsi) gsi_irq_disable(gsi); } +/* Turn off all GSI interrupts initially */ +static int gsi_irq_setup(struct gsi *gsi) +{ + int ret; + + /* Writing 1 indicates IRQ interrupts; 0 would be MSI */ + iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET); + + /* Disable all interrupt types */ + gsi_irq_type_update(gsi, 0); + + /* Clear all type-specific interrupt masks */ + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); + + /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */ + if (gsi->version > IPA_VERSION_3_1) { + u32 offset; + + /* These registers are in the non-adjusted address range */ + offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET; + iowrite32(0, gsi->virt_raw + offset); + offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET; + iowrite32(0, gsi->virt_raw + offset); + } + + iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); + + ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi); + if (ret) + dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret); + + return ret; +} + +static void gsi_irq_teardown(struct gsi *gsi) +{ + free_irq(gsi->irq, gsi); +} + +/* Get # supported channel and event rings; there is no gsi_ring_teardown() */ +static int gsi_ring_setup(struct gsi *gsi) +{ + struct device *dev = gsi->dev; + u32 count; + u32 val; + + if (gsi->version < IPA_VERSION_3_5_1) { + /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */ + gsi->channel_count = GSI_CHANNEL_COUNT_MAX; + gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX; + + return 0; + } + + val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET); + + count = u32_get_bits(val, NUM_CH_PER_EE_FMASK); + if (!count) { + dev_err(dev, "GSI reports zero channels supported\n"); + return -EINVAL; + } + if (count > GSI_CHANNEL_COUNT_MAX) { + dev_warn(dev, "limiting to %u channels; hardware supports %u\n", + GSI_CHANNEL_COUNT_MAX, count); + count = GSI_CHANNEL_COUNT_MAX; + } + gsi->channel_count = count; + + count = u32_get_bits(val, NUM_EV_PER_EE_FMASK); + if (!count) { + dev_err(dev, "GSI reports zero event rings supported\n"); + return -EINVAL; + } + if (count > GSI_EVT_RING_COUNT_MAX) { + dev_warn(dev, + "limiting to %u event rings; hardware supports %u\n", + GSI_EVT_RING_COUNT_MAX, count); + count = GSI_EVT_RING_COUNT_MAX; + } + gsi->evt_ring_count = count; + + return 0; +} + /* Setup function for GSI. GSI firmware must be loaded and initialized */ int gsi_setup(struct gsi *gsi) { @@ -1889,25 +1906,34 @@ int gsi_setup(struct gsi *gsi) return -EIO; } - gsi_irq_setup(gsi); /* No matching teardown required */ + ret = gsi_irq_setup(gsi); + if (ret) + return ret; ret = gsi_ring_setup(gsi); /* No matching teardown required */ if (ret) - return ret; + goto err_irq_teardown; /* Initialize the error log */ iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET); - /* Writing 1 indicates IRQ interrupts; 0 would be MSI */ - iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET); + ret = gsi_channel_setup(gsi); + if (ret) + goto err_irq_teardown; - return gsi_channel_setup(gsi); + return 0; + +err_irq_teardown: + gsi_irq_teardown(gsi); + + return ret; } /* Inverse of gsi_setup() */ void gsi_teardown(struct gsi *gsi) { gsi_channel_teardown(gsi); + gsi_irq_teardown(gsi); } /* Initialize a channel's event ring */ @@ -1964,7 +1990,6 @@ static void gsi_evt_ring_init(struct gsi *gsi) static bool gsi_channel_data_valid(struct gsi *gsi, const struct ipa_gsi_endpoint_data *data) { -#ifdef IPA_VALIDATION u32 channel_id = data->channel_id; struct device *dev = gsi->dev; @@ -2010,7 +2035,6 @@ static bool gsi_channel_data_valid(struct gsi *gsi, channel_id, data->channel.event_count); return false; } -#endif /* IPA_VALIDATION */ return true; } @@ -2206,20 +2230,18 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev, init_completion(&gsi->completion); - ret = gsi_irq_init(gsi, pdev); + ret = gsi_irq_init(gsi, pdev); /* No matching exit required */ if (ret) goto err_iounmap; ret = gsi_channel_init(gsi, count, data); if (ret) - goto err_irq_exit; + goto err_iounmap; mutex_init(&gsi->mutex); return 0; -err_irq_exit: - gsi_irq_exit(gsi); err_iounmap: iounmap(gsi->virt_raw); @@ -2231,7 +2253,6 @@ void gsi_exit(struct gsi *gsi) { mutex_destroy(&gsi->mutex); gsi_channel_exit(gsi); - gsi_irq_exit(gsi); iounmap(gsi->virt_raw); } diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h index 81cd7b07f6e1..88b80dc3db79 100644 --- a/drivers/net/ipa/gsi.h +++ b/drivers/net/ipa/gsi.h @@ -232,8 +232,35 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id); */ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell); -int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop); -int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start); +/** + * gsi_suspend() - Prepare the GSI subsystem for suspend + * @gsi: GSI pointer + */ +void gsi_suspend(struct gsi *gsi); + +/** + * gsi_resume() - Resume the GSI subsystem following suspend + * @gsi: GSI pointer + */ +void gsi_resume(struct gsi *gsi); + +/** + * gsi_channel_suspend() - Suspend a GSI channel + * @gsi: GSI pointer + * @channel_id: Channel to suspend + * + * For IPA v4.0+, suspend is implemented by stopping the channel. + */ +int gsi_channel_suspend(struct gsi *gsi, u32 channel_id); + +/** + * gsi_channel_resume() - Resume a suspended GSI channel + * @gsi: GSI pointer + * @channel_id: Channel to resume + * + * For IPA v4.0+, the stopped channel is started again. + */ +int gsi_channel_resume(struct gsi *gsi, u32 channel_id); /** * gsi_init() - Initialize the GSI subsystem diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 8c795a6a8598..1544564bc283 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -90,14 +90,12 @@ int gsi_trans_pool_init(struct gsi_trans_pool *pool, size_t size, u32 count, { void *virt; -#ifdef IPA_VALIDATE if (!size) return -EINVAL; if (count < max_alloc) return -EINVAL; if (!max_alloc) return -EINVAL; -#endif /* IPA_VALIDATE */ /* By allocating a few extra entries in our pool (one less * than the maximum number that will be requested in a @@ -140,14 +138,12 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, dma_addr_t addr; void *virt; -#ifdef IPA_VALIDATE if (!size) return -EINVAL; if (count < max_alloc) return -EINVAL; if (!max_alloc) return -EINVAL; -#endif /* IPA_VALIDATE */ /* Don't let allocations cross a power-of-two boundary */ size = __roundup_pow_of_two(size); @@ -188,8 +184,8 @@ static u32 gsi_trans_pool_alloc_common(struct gsi_trans_pool *pool, u32 count) { u32 offset; - /* assert(count > 0); */ - /* assert(count <= pool->max_alloc); */ + WARN_ON(!count); + WARN_ON(count > pool->max_alloc); /* Allocate from beginning if wrap would occur */ if (count > pool->count - pool->free) @@ -225,9 +221,10 @@ void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element) { void *end = pool->base + pool->count * pool->size; - /* assert(element >= pool->base); */ - /* assert(element < end); */ - /* assert(pool->max_alloc == 1); */ + WARN_ON(element < pool->base); + WARN_ON(element >= end); + WARN_ON(pool->max_alloc != 1); + element += pool->size; return element < end ? element : pool->base; @@ -332,7 +329,8 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id, struct gsi_trans_info *trans_info; struct gsi_trans *trans; - /* assert(tre_count <= gsi_channel_trans_tre_max(gsi, channel_id)); */ + if (WARN_ON(tre_count > gsi_channel_trans_tre_max(gsi, channel_id))) + return NULL; trans_info = &channel->trans_info; @@ -408,7 +406,7 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size, u32 which = trans->used++; struct scatterlist *sg; - /* assert(which < trans->tre_count); */ + WARN_ON(which >= trans->tre_count); /* Commands are quite different from data transfer requests. * Their payloads come from a pool whose memory is allocated @@ -441,8 +439,10 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size, struct scatterlist *sg = &trans->sgl[0]; int ret; - /* assert(trans->tre_count == 1); */ - /* assert(!trans->used); */ + if (WARN_ON(trans->tre_count != 1)) + return -EINVAL; + if (WARN_ON(trans->used)) + return -EINVAL; sg_set_page(sg, page, size, offset); ret = dma_map_sg(trans->gsi->dev, sg, 1, trans->direction); @@ -461,8 +461,10 @@ int gsi_trans_skb_add(struct gsi_trans *trans, struct sk_buff *skb) u32 used; int ret; - /* assert(trans->tre_count == 1); */ - /* assert(!trans->used); */ + if (WARN_ON(trans->tre_count != 1)) + return -EINVAL; + if (WARN_ON(trans->used)) + return -EINVAL; /* skb->len will not be 0 (checked early) */ ret = skb_to_sgvec(skb, sg, 0, skb->len); @@ -550,7 +552,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db) u32 avail; u32 i; - /* assert(trans->used > 0); */ + WARN_ON(!trans->used); /* Consume the entries. If we cross the end of the ring while * filling them we'll switch to the beginning to finish. diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index 744406832a77..34152fe02963 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -28,19 +28,8 @@ struct ipa_smp2p; struct ipa_interrupt; /** - * enum ipa_flag - IPA state flags - * @IPA_FLAG_RESUMED: Whether resume from suspend has been signaled - * @IPA_FLAG_COUNT: Number of defined IPA flags - */ -enum ipa_flag { - IPA_FLAG_RESUMED, - IPA_FLAG_COUNT, /* Last; not a flag */ -}; - -/** * struct ipa - IPA information * @gsi: Embedded GSI structure - * @flags: Boolean state flags * @version: IPA hardware version * @pdev: Platform device * @completion: Used to signal pipeline clear transfer complete @@ -51,6 +40,7 @@ enum ipa_flag { * @table_addr: DMA address of filter/route table content * @table_virt: Virtual address of filter/route table content * @interrupt: IPA Interrupt information + * @uc_clocked: true if clock is active by proxy for microcontroller * @uc_loaded: true after microcontroller has reported it's ready * @reg_addr: DMA address used for IPA register access * @reg_virt: Virtual address used for IPA register access @@ -82,7 +72,6 @@ enum ipa_flag { */ struct ipa { struct gsi gsi; - DECLARE_BITMAP(flags, IPA_FLAG_COUNT); enum ipa_version version; struct platform_device *pdev; struct completion completion; @@ -95,6 +84,7 @@ struct ipa { __le64 *table_virt; struct ipa_interrupt *interrupt; + bool uc_clocked; bool uc_loaded; dma_addr_t reg_addr; diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 69ef6ea41e61..a67b6136e3c0 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -9,9 +9,12 @@ #include <linux/clk.h> #include <linux/device.h> #include <linux/interconnect.h> +#include <linux/pm.h> +#include <linux/bitops.h> #include "ipa.h" #include "ipa_clock.h" +#include "ipa_endpoint.h" #include "ipa_modem.h" #include "ipa_data.h" @@ -43,10 +46,21 @@ struct ipa_interconnect { }; /** + * enum ipa_power_flag - IPA power flags + * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled + * @IPA_POWER_FLAG_COUNT: Number of defined power flags + */ +enum ipa_power_flag { + IPA_POWER_FLAG_RESUMED, + IPA_POWER_FLAG_COUNT, /* Last; not a flag */ +}; + +/** * struct ipa_clock - IPA clocking information * @count: Clocking reference count * @mutex: Protects clock enable/disable * @core: IPA core clock + * @flags: Boolean state flags * @interconnect_count: Number of elements in interconnect[] * @interconnect: Interconnect array */ @@ -54,6 +68,7 @@ struct ipa_clock { refcount_t count; struct mutex mutex; /* protects clock enable/disable */ struct clk *core; + DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); u32 interconnect_count; struct ipa_interconnect *interconnect; }; @@ -144,8 +159,12 @@ static int ipa_interconnect_enable(struct ipa *ipa) ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, interconnect->peak_bandwidth); - if (ret) + if (ret) { + dev_err(&ipa->pdev->dev, + "error %d enabling %s interconnect\n", + ret, icc_get_name(interconnect->path)); goto out_unwind; + } interconnect++; } @@ -159,10 +178,11 @@ out_unwind: } /* To disable an interconnect, we just its bandwidth to 0 */ -static void ipa_interconnect_disable(struct ipa *ipa) +static int ipa_interconnect_disable(struct ipa *ipa) { struct ipa_interconnect *interconnect; struct ipa_clock *clock = ipa->clock; + struct device *dev = &ipa->pdev->dev; int result = 0; u32 count; int ret; @@ -172,13 +192,16 @@ static void ipa_interconnect_disable(struct ipa *ipa) while (count--) { interconnect--; ret = icc_set_bw(interconnect->path, 0, 0); - if (ret && !result) - result = ret; + if (ret) { + dev_err(dev, "error %d disabling %s interconnect\n", + ret, icc_get_name(interconnect->path)); + /* Try to disable all; record only the first error */ + if (!result) + result = ret; + } } - if (result) - dev_err(&ipa->pdev->dev, - "error %d disabling IPA interconnects\n", ret); + return result; } /* Turn on IPA clocks, including interconnects */ @@ -191,8 +214,10 @@ static int ipa_clock_enable(struct ipa *ipa) return ret; ret = clk_prepare_enable(ipa->clock->core); - if (ret) - ipa_interconnect_disable(ipa); + if (ret) { + dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret); + (void)ipa_interconnect_disable(ipa); + } return ret; } @@ -201,7 +226,7 @@ static int ipa_clock_enable(struct ipa *ipa) static void ipa_clock_disable(struct ipa *ipa) { clk_disable_unprepare(ipa->clock->core); - ipa_interconnect_disable(ipa); + (void)ipa_interconnect_disable(ipa); } /* Get an IPA clock reference, but only if the reference count is @@ -238,13 +263,8 @@ void ipa_clock_get(struct ipa *ipa) goto out_mutex_unlock; ret = ipa_clock_enable(ipa); - if (ret) { - dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret); - goto out_mutex_unlock; - } - - refcount_set(&clock->count, 1); - + if (!ret) + refcount_set(&clock->count, 1); out_mutex_unlock: mutex_unlock(&clock->mutex); } @@ -271,6 +291,40 @@ u32 ipa_clock_rate(struct ipa *ipa) return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0; } +/** + * ipa_suspend_handler() - Handle the suspend IPA interrupt + * @ipa: IPA pointer + * @irq_id: IPA interrupt type (unused) + * + * If an RX endpoint is suspended, and the IPA has a packet destined for + * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP + * that it should resume the endpoint. If we get one of these interrupts + * we just wake up the system. + */ +static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) +{ + /* Just report the event, and let system resume handle the rest. + * More than one endpoint could signal this; if so, ignore + * all but the first. + */ + if (!test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags)) + pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); + + /* Acknowledge/clear the suspend interrupt on all endpoints */ + ipa_interrupt_suspend_clear_all(ipa->interrupt); +} + +void ipa_power_setup(struct ipa *ipa) +{ + ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, + ipa_suspend_handler); +} + +void ipa_power_teardown(struct ipa *ipa) +{ + ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); +} + /* Initialize IPA clocking */ struct ipa_clock * ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) @@ -329,3 +383,62 @@ void ipa_clock_exit(struct ipa_clock *clock) kfree(clock); clk_put(clk); } + +/** + * ipa_suspend() - Power management system suspend callback + * @dev: IPA device structure + * + * Return: Always returns zero + * + * Called by the PM framework when a system suspend operation is invoked. + * Suspends endpoints and releases the clock reference held to keep + * the IPA clock running until this point. + */ +static int ipa_suspend(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags); + ipa_endpoint_suspend(ipa); + gsi_suspend(&ipa->gsi); + } + + ipa_clock_put(ipa); + + return 0; +} + +/** + * ipa_resume() - Power management system resume callback + * @dev: IPA device structure + * + * Return: Always returns 0 + * + * Called by the PM framework when a system resume operation is invoked. + * Takes an IPA clock reference to keep the clock running until suspend, + * and resumes endpoints. + */ +static int ipa_resume(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + /* This clock reference will keep the IPA out of suspend + * until we get a power management suspend request. + */ + ipa_clock_get(ipa); + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + gsi_resume(&ipa->gsi); + ipa_endpoint_resume(ipa); + } + + return 0; +} + +const struct dev_pm_ops ipa_pm_ops = { + .suspend = ipa_suspend, + .resume = ipa_resume, +}; diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h index 1fe634760e59..2a0f7ff3c9e6 100644 --- a/drivers/net/ipa/ipa_clock.h +++ b/drivers/net/ipa/ipa_clock.h @@ -11,6 +11,9 @@ struct device; struct ipa; struct ipa_clock_data; +/* IPA device power management function block */ +extern const struct dev_pm_ops ipa_pm_ops; + /** * ipa_clock_rate() - Return the current IPA core clock rate * @ipa: IPA structure @@ -20,6 +23,18 @@ struct ipa_clock_data; u32 ipa_clock_rate(struct ipa *ipa); /** + * ipa_power_setup() - Set up IPA power management + * @ipa: IPA pointer + */ +void ipa_power_setup(struct ipa *ipa); + +/** + * ipa_power_teardown() - Inverse of ipa_power_setup() + * @ipa: IPA pointer + */ +void ipa_power_teardown(struct ipa *ipa); + +/** * ipa_clock_init() - Initialize IPA clocking * @dev: IPA device * @data: Clock configuration data diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index af44ca41189e..cff51731195a 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -159,35 +159,49 @@ static void ipa_cmd_validate_build(void) BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK)); #undef TABLE_COUNT_MAX #undef TABLE_SIZE -} -#ifdef IPA_VALIDATE + /* Hashed and non-hashed fields are assumed to be the same size */ + BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_SIZE_FMASK) != + field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK)); + BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK) != + field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK)); + + /* Valid endpoint numbers must fit in the IP packet init command */ + BUILD_BUG_ON(field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK) < + IPA_ENDPOINT_MAX - 1); +} /* Validate a memory region holding a table */ -bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, - bool route, bool ipv6, bool hashed) +bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, bool route) { + u32 offset_max = field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK); + u32 size_max = field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK); + const char *table = route ? "route" : "filter"; struct device *dev = &ipa->pdev->dev; - u32 offset_max; - offset_max = hashed ? field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK) - : field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK); + /* Size must fit in the immediate command field that holds it */ + if (mem->size > size_max) { + dev_err(dev, "%s table region size too large\n", table); + dev_err(dev, " (0x%04x > 0x%04x)\n", + mem->size, size_max); + + return false; + } + + /* Offset must fit in the immediate command field that holds it */ if (mem->offset > offset_max || ipa->mem_offset > offset_max - mem->offset) { - dev_err(dev, "IPv%c %s%s table region offset too large\n", - ipv6 ? '6' : '4', hashed ? "hashed " : "", - route ? "route" : "filter"); + dev_err(dev, "%s table region offset too large\n", table); dev_err(dev, " (0x%04x + 0x%04x > 0x%04x)\n", ipa->mem_offset, mem->offset, offset_max); return false; } + /* Entire memory range must fit within IPA-local memory */ if (mem->offset > ipa->mem_size || mem->size > ipa->mem_size - mem->offset) { - dev_err(dev, "IPv%c %s%s table region out of range\n", - ipv6 ? '6' : '4', hashed ? "hashed " : "", - route ? "route" : "filter"); + dev_err(dev, "%s table region out of range\n", table); dev_err(dev, " (0x%04x + 0x%04x > 0x%04x)\n", mem->offset, mem->size, ipa->mem_size); @@ -331,7 +345,6 @@ bool ipa_cmd_data_valid(struct ipa *ipa) return true; } -#endif /* IPA_VALIDATE */ int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_max) { @@ -522,9 +535,6 @@ static void ipa_cmd_ip_packet_init_add(struct gsi_trans *trans, u8 endpoint_id) union ipa_cmd_payload *cmd_payload; dma_addr_t payload_addr; - /* assert(endpoint_id < - field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK)); */ - cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr); payload = &cmd_payload->ip_packet_init; @@ -548,8 +558,9 @@ void ipa_cmd_dma_shared_mem_add(struct gsi_trans *trans, u32 offset, u16 size, u16 flags; /* size and offset must fit in 16 bit fields */ - /* assert(size > 0 && size <= U16_MAX); */ - /* assert(offset <= U16_MAX && ipa->mem_offset <= U16_MAX - offset); */ + WARN_ON(!size); + WARN_ON(size > U16_MAX); + WARN_ON(offset > U16_MAX || ipa->mem_offset > U16_MAX - offset); offset += ipa->mem_offset; @@ -588,8 +599,6 @@ static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans) union ipa_cmd_payload *cmd_payload; dma_addr_t payload_addr; - /* assert(tag <= field_max(IP_PACKET_TAG_STATUS_TAG_FMASK)); */ - cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr); payload = &cmd_payload->ip_packet_tag_status; diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index b99262281f41..69cd085d427d 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -57,20 +57,16 @@ struct ipa_cmd_info { enum dma_data_direction direction; }; -#ifdef IPA_VALIDATE - /** * ipa_cmd_table_valid() - Validate a memory region holding a table * @ipa: - IPA pointer * @mem: - IPA memory region descriptor * @route: - Whether the region holds a route or filter table - * @ipv6: - Whether the table is for IPv6 or IPv4 - * @hashed: - Whether the table is hashed or non-hashed * * Return: true if region is valid, false otherwise */ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, - bool route, bool ipv6, bool hashed); + bool route); /** * ipa_cmd_data_valid() - Validate command-realted configuration is valid @@ -80,22 +76,6 @@ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, */ bool ipa_cmd_data_valid(struct ipa *ipa); -#else /* !IPA_VALIDATE */ - -static inline bool ipa_cmd_table_valid(struct ipa *ipa, - const struct ipa_mem *mem, bool route, - bool ipv6, bool hashed) -{ - return true; -} - -static inline bool ipa_cmd_data_valid(struct ipa *ipa) -{ - return true; -} - -#endif /* !IPA_VALIDATE */ - /** * ipa_cmd_pool_init() - initialize command channel pools * @channel: AP->IPA command TX GSI channel pointer diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c index 9353efbd504f..782f67e3e079 100644 --- a/drivers/net/ipa/ipa_data-v4.11.c +++ b/drivers/net/ipa/ipa_data-v4.11.c @@ -105,6 +105,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -128,6 +129,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { @@ -368,18 +370,13 @@ static const struct ipa_mem_data ipa_mem_data = { static const struct ipa_interconnect_data ipa_interconnect_data[] = { { .name = "memory", - .peak_bandwidth = 465000, /* 465 MBps */ - .average_bandwidth = 80000, /* 80 MBps */ - }, - /* Average rate is unused for the next two interconnects */ - { - .name = "imem", - .peak_bandwidth = 68570, /* 68.57 MBps */ - .average_bandwidth = 80000, /* 80 MBps (unused?) */ + .peak_bandwidth = 600000, /* 600 MBps */ + .average_bandwidth = 150000, /* 150 MBps */ }, + /* Average rate is unused for the next interconnect */ { .name = "config", - .peak_bandwidth = 30000, /* 30 MBps */ + .peak_bandwidth = 74000, /* 74 MBps */ .average_bandwidth = 0, /* unused */ }, }; diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c index a99b6478fa3a..db6fda2fe43d 100644 --- a/drivers/net/ipa/ipa_data-v4.5.c +++ b/drivers/net/ipa/ipa_data-v4.5.c @@ -114,6 +114,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -137,6 +138,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c index 798d43e1eb13..8d83e14819e2 100644 --- a/drivers/net/ipa/ipa_data-v4.9.c +++ b/drivers/net/ipa/ipa_data-v4.9.c @@ -106,6 +106,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -129,6 +130,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { @@ -416,18 +418,13 @@ static const struct ipa_mem_data ipa_mem_data = { /* Interconnect rates are in 1000 byte/second units */ static const struct ipa_interconnect_data ipa_interconnect_data[] = { { - .name = "ipa_to_llcc", + .name = "memory", .peak_bandwidth = 600000, /* 600 MBps */ .average_bandwidth = 150000, /* 150 MBps */ }, - { - .name = "llcc_to_ebi1", - .peak_bandwidth = 1804000, /* 1.804 GBps */ - .average_bandwidth = 150000, /* 150 MBps */ - }, /* Average rate is unused for the next interconnect */ { - .name = "appss_to_ipa", + .name = "config", .peak_bandwidth = 74000, /* 74 MBps */ .average_bandwidth = 0, /* unused */ }, diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index ab02669bae4e..08ee37ae2881 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -250,17 +250,18 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay) /* Suspend is not supported for IPA v4.0+. Delay doesn't work * correctly on IPA v4.2. - * - * if (endpoint->toward_ipa) - * assert(ipa->version != IPA_VERSION_4.2); - * else - * assert(ipa->version < IPA_VERSION_4_0); */ + if (endpoint->toward_ipa) + WARN_ON(ipa->version == IPA_VERSION_4_2); + else + WARN_ON(ipa->version >= IPA_VERSION_4_0); + mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK; val = ioread32(ipa->reg_virt + offset); - /* Don't bother if it's already in the requested state */ state = !!(val & mask); + + /* Don't bother if it's already in the requested state */ if (suspend_delay != state) { val ^= mask; iowrite32(val, ipa->reg_virt + offset); @@ -273,7 +274,7 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay) static void ipa_endpoint_program_delay(struct ipa_endpoint *endpoint, bool enable) { - /* assert(endpoint->toward_ipa); */ + WARN_ON(!endpoint->toward_ipa); /* Delay mode doesn't work properly for IPA v4.2 */ if (endpoint->ipa->version != IPA_VERSION_4_2) @@ -287,7 +288,8 @@ static bool ipa_endpoint_aggr_active(struct ipa_endpoint *endpoint) u32 offset; u32 val; - /* assert(mask & ipa->available); */ + WARN_ON(!(mask & ipa->available)); + offset = ipa_reg_state_aggr_active_offset(ipa->version); val = ioread32(ipa->reg_virt + offset); @@ -299,7 +301,8 @@ static void ipa_endpoint_force_close(struct ipa_endpoint *endpoint) u32 mask = BIT(endpoint->endpoint_id); struct ipa *ipa = endpoint->ipa; - /* assert(mask & ipa->available); */ + WARN_ON(!(mask & ipa->available)); + iowrite32(mask, ipa->reg_virt + IPA_REG_AGGR_FORCE_CLOSE_OFFSET); } @@ -338,7 +341,7 @@ ipa_endpoint_program_suspend(struct ipa_endpoint *endpoint, bool enable) if (endpoint->ipa->version >= IPA_VERSION_4_0) return enable; /* For IPA v4.0+, no change made */ - /* assert(!endpoint->toward_ipa); */ + WARN_ON(endpoint->toward_ipa); suspended = ipa_endpoint_init_ctrl(endpoint, enable); @@ -1156,7 +1159,8 @@ static bool ipa_endpoint_skb_build(struct ipa_endpoint *endpoint, if (!endpoint->netdev) return false; - /* assert(len <= SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE-NET_SKB_PAD)); */ + WARN_ON(len > SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE - NET_SKB_PAD)); + skb = build_skb(page_address(page), IPA_RX_BUFFER_SIZE); if (skb) { /* Reserve the headroom and account for the data */ @@ -1583,7 +1587,6 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) { struct device *dev = &endpoint->ipa->pdev->dev; struct gsi *gsi = &endpoint->ipa->gsi; - bool stop_channel; int ret; if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id))) @@ -1594,11 +1597,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) (void)ipa_endpoint_program_suspend(endpoint, true); } - /* Starting with IPA v4.0, endpoints are suspended by stopping the - * underlying GSI channel rather than using endpoint suspend mode. - */ - stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0; - ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel); + ret = gsi_channel_suspend(gsi, endpoint->channel_id); if (ret) dev_err(dev, "error %d suspending channel %u\n", ret, endpoint->channel_id); @@ -1608,7 +1607,6 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) { struct device *dev = &endpoint->ipa->pdev->dev; struct gsi *gsi = &endpoint->ipa->gsi; - bool start_channel; int ret; if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id))) @@ -1617,11 +1615,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) if (!endpoint->toward_ipa) (void)ipa_endpoint_program_suspend(endpoint, false); - /* Starting with IPA v4.0, the underlying GSI channel must be - * restarted for resume. - */ - start_channel = endpoint->ipa->version >= IPA_VERSION_4_0; - ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel); + ret = gsi_channel_resume(gsi, endpoint->channel_id); if (ret) dev_err(dev, "error %d resuming channel %u\n", ret, endpoint->channel_id); diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index c46df0b7c4e5..aa37f03f4557 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -74,21 +74,25 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id) iowrite32(mask, ipa->reg_virt + offset); } -/* Process all IPA interrupt types that have been signaled */ -static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt) +/* IPA IRQ handler is threaded */ +static irqreturn_t ipa_isr_thread(int irq, void *dev_id) { + struct ipa_interrupt *interrupt = dev_id; struct ipa *ipa = interrupt->ipa; u32 enabled = interrupt->enabled; + u32 pending; u32 offset; u32 mask; + ipa_clock_get(ipa); + /* The status register indicates which conditions are present, * including conditions whose interrupt is not enabled. Handle * only the enabled ones. */ offset = ipa_reg_irq_stts_offset(ipa->version); - mask = ioread32(ipa->reg_virt + offset); - while ((mask &= enabled)) { + pending = ioread32(ipa->reg_virt + offset); + while ((mask = pending & enabled)) { do { u32 irq_id = __ffs(mask); @@ -96,43 +100,20 @@ static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt) ipa_interrupt_process(interrupt, irq_id); } while (mask); - mask = ioread32(ipa->reg_virt + offset); + pending = ioread32(ipa->reg_virt + offset); } -} -/* Threaded part of the IPA IRQ handler */ -static irqreturn_t ipa_isr_thread(int irq, void *dev_id) -{ - struct ipa_interrupt *interrupt = dev_id; - - ipa_clock_get(interrupt->ipa); - - ipa_interrupt_process_all(interrupt); - - ipa_clock_put(interrupt->ipa); - - return IRQ_HANDLED; -} + /* If any disabled interrupts are pending, clear them */ + if (pending) { + struct device *dev = &ipa->pdev->dev; -/* Hard part (i.e., "real" IRQ handler) of the IRQ handler */ -static irqreturn_t ipa_isr(int irq, void *dev_id) -{ - struct ipa_interrupt *interrupt = dev_id; - struct ipa *ipa = interrupt->ipa; - u32 offset; - u32 mask; - - offset = ipa_reg_irq_stts_offset(ipa->version); - mask = ioread32(ipa->reg_virt + offset); - if (mask & interrupt->enabled) - return IRQ_WAKE_THREAD; - - /* Nothing in the mask was supposed to cause an interrupt */ - offset = ipa_reg_irq_clr_offset(ipa->version); - iowrite32(mask, ipa->reg_virt + offset); + dev_dbg(dev, "clearing disabled IPA interrupts 0x%08x\n", + pending); + offset = ipa_reg_irq_clr_offset(ipa->version); + iowrite32(pending, ipa->reg_virt + offset); + } - dev_err(&ipa->pdev->dev, "%s: unexpected interrupt, mask 0x%08x\n", - __func__, mask); + ipa_clock_put(ipa); return IRQ_HANDLED; } @@ -146,7 +127,7 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, u32 offset; u32 val; - /* assert(mask & ipa->available); */ + WARN_ON(!(mask & ipa->available)); /* IPA version 3.0 does not support TX_SUSPEND interrupt control */ if (ipa->version == IPA_VERSION_3_0) @@ -206,7 +187,8 @@ void ipa_interrupt_add(struct ipa_interrupt *interrupt, struct ipa *ipa = interrupt->ipa; u32 offset; - /* assert(ipa_irq < IPA_IRQ_COUNT); */ + WARN_ON(ipa_irq >= IPA_IRQ_COUNT); + interrupt->handler[ipa_irq] = handler; /* Update the IPA interrupt mask to enable it */ @@ -222,7 +204,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq) struct ipa *ipa = interrupt->ipa; u32 offset; - /* assert(ipa_irq < IPA_IRQ_COUNT); */ + WARN_ON(ipa_irq >= IPA_IRQ_COUNT); + /* Update the IPA interrupt mask to disable it */ interrupt->enabled &= ~BIT(ipa_irq); offset = ipa_reg_irq_en_offset(ipa->version); @@ -231,8 +214,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq) interrupt->handler[ipa_irq] = NULL; } -/* Set up the IPA interrupt framework */ -struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa) +/* Configure the IPA interrupt framework */ +struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa) { struct device *dev = &ipa->pdev->dev; struct ipa_interrupt *interrupt; @@ -258,7 +241,7 @@ struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa) offset = ipa_reg_irq_en_offset(ipa->version); iowrite32(0, ipa->reg_virt + offset); - ret = request_threaded_irq(irq, ipa_isr, ipa_isr_thread, IRQF_ONESHOT, + ret = request_threaded_irq(irq, NULL, ipa_isr_thread, IRQF_ONESHOT, "ipa", interrupt); if (ret) { dev_err(dev, "error %d requesting \"ipa\" IRQ\n", ret); @@ -281,8 +264,8 @@ err_kfree: return ERR_PTR(ret); } -/* Tear down the IPA interrupt framework */ -void ipa_interrupt_teardown(struct ipa_interrupt *interrupt) +/* Inverse of ipa_interrupt_config() */ +void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt) { struct device *dev = &interrupt->ipa->pdev->dev; int ret; diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h index d5c486a6800d..231390cea52a 100644 --- a/drivers/net/ipa/ipa_interrupt.h +++ b/drivers/net/ipa/ipa_interrupt.h @@ -86,17 +86,17 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt); void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt); /** - * ipa_interrupt_setup() - Set up the IPA interrupt framework + * ipa_interrupt_config() - Configure the IPA interrupt framework * @ipa: IPA pointer * * Return: Pointer to IPA SMP2P info, or a pointer-coded error */ -struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa); +struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa); /** - * ipa_interrupt_teardown() - Tear down the IPA interrupt framework + * ipa_interrupt_deconfig() - Inverse of ipa_interrupt_config() * @interrupt: IPA interrupt structure */ -void ipa_interrupt_teardown(struct ipa_interrupt *interrupt); +void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt); #endif /* _IPA_INTERRUPT_H_ */ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 9810c61a0320..25bbb456e007 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -80,29 +80,6 @@ #define IPA_XO_CLOCK_DIVIDER 192 /* 1 is subtracted where used */ /** - * ipa_suspend_handler() - Handle the suspend IPA interrupt - * @ipa: IPA pointer - * @irq_id: IPA interrupt type (unused) - * - * If an RX endpoint is in suspend state, and the IPA has a packet - * destined for that endpoint, the IPA generates a SUSPEND interrupt - * to inform the AP that it should resume the endpoint. If we get - * one of these interrupts we just resume everything. - */ -static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) -{ - /* Just report the event, and let system resume handle the rest. - * More than one endpoint could signal this; if so, ignore - * all but the first. - */ - if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags)) - pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); - - /* Acknowledge/clear the suspend interrupt on all endpoints */ - ipa_interrupt_suspend_clear_all(ipa->interrupt); -} - -/** * ipa_setup() - Set up IPA hardware * @ipa: IPA pointer * @@ -124,19 +101,11 @@ int ipa_setup(struct ipa *ipa) if (ret) return ret; - ipa->interrupt = ipa_interrupt_setup(ipa); - if (IS_ERR(ipa->interrupt)) { - ret = PTR_ERR(ipa->interrupt); - goto err_gsi_teardown; - } - ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, - ipa_suspend_handler); - - ipa_uc_setup(ipa); + ipa_power_setup(ipa); ret = device_init_wakeup(dev, true); if (ret) - goto err_uc_teardown; + goto err_gsi_teardown; ipa_endpoint_setup(ipa); @@ -167,7 +136,7 @@ int ipa_setup(struct ipa *ipa) ipa_endpoint_default_route_set(ipa, exception_endpoint->endpoint_id); /* We're all set. Now prepare for communication with the modem */ - ret = ipa_modem_setup(ipa); + ret = ipa_qmi_setup(ipa); if (ret) goto err_default_route_clear; @@ -184,11 +153,8 @@ err_command_disable: ipa_endpoint_disable_one(command_endpoint); err_endpoint_teardown: ipa_endpoint_teardown(ipa); + ipa_power_teardown(ipa); (void)device_init_wakeup(dev, false); -err_uc_teardown: - ipa_uc_teardown(ipa); - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); - ipa_interrupt_teardown(ipa->interrupt); err_gsi_teardown: gsi_teardown(&ipa->gsi); @@ -204,17 +170,18 @@ static void ipa_teardown(struct ipa *ipa) struct ipa_endpoint *exception_endpoint; struct ipa_endpoint *command_endpoint; - ipa_modem_teardown(ipa); + /* We're going to tear everything down, as if setup never completed */ + ipa->setup_complete = false; + + ipa_qmi_teardown(ipa); ipa_endpoint_default_route_clear(ipa); exception_endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]; ipa_endpoint_disable_one(exception_endpoint); command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]; ipa_endpoint_disable_one(command_endpoint); ipa_endpoint_teardown(ipa); + ipa_power_teardown(ipa); (void)device_init_wakeup(&ipa->pdev->dev, false); - ipa_uc_teardown(ipa); - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); - ipa_interrupt_teardown(ipa->interrupt); gsi_teardown(&ipa->gsi); } @@ -253,9 +220,6 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data) const struct ipa_qsb_data *data1; u32 val; - /* assert(data->qsb_count > 0); */ - /* assert(data->qsb_count < 3); */ - /* QMB 0 represents DDR; QMB 1 (if present) represents PCIe */ data0 = &data->qsb_data[IPA_QSB_MASTER_DDR]; if (data->qsb_count > 1) @@ -293,7 +257,7 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data) */ static u32 ipa_aggr_granularity_val(u32 usec) { - /* assert(usec != 0); */ + WARN_ON(!usec); return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1; } @@ -471,31 +435,44 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) ipa_hardware_config(ipa, data); - ret = ipa_endpoint_config(ipa); + ret = ipa_mem_config(ipa); if (ret) goto err_hardware_deconfig; - ret = ipa_mem_config(ipa); + ipa->interrupt = ipa_interrupt_config(ipa); + if (IS_ERR(ipa->interrupt)) { + ret = PTR_ERR(ipa->interrupt); + ipa->interrupt = NULL; + goto err_mem_deconfig; + } + + ipa_uc_config(ipa); + + ret = ipa_endpoint_config(ipa); if (ret) - goto err_endpoint_deconfig; + goto err_uc_deconfig; ipa_table_config(ipa); /* No deconfig required */ /* Assign resource limitation to each group; no deconfig required */ ret = ipa_resource_config(ipa, data->resource_data); if (ret) - goto err_mem_deconfig; + goto err_endpoint_deconfig; ret = ipa_modem_config(ipa); if (ret) - goto err_mem_deconfig; + goto err_endpoint_deconfig; return 0; -err_mem_deconfig: - ipa_mem_deconfig(ipa); err_endpoint_deconfig: ipa_endpoint_deconfig(ipa); +err_uc_deconfig: + ipa_uc_deconfig(ipa); + ipa_interrupt_deconfig(ipa->interrupt); + ipa->interrupt = NULL; +err_mem_deconfig: + ipa_mem_deconfig(ipa); err_hardware_deconfig: ipa_hardware_deconfig(ipa); ipa_clock_put(ipa); @@ -510,8 +487,11 @@ err_hardware_deconfig: static void ipa_deconfig(struct ipa *ipa) { ipa_modem_deconfig(ipa); - ipa_mem_deconfig(ipa); ipa_endpoint_deconfig(ipa); + ipa_uc_deconfig(ipa); + ipa_interrupt_deconfig(ipa->interrupt); + ipa->interrupt = NULL; + ipa_mem_deconfig(ipa); ipa_hardware_deconfig(ipa); ipa_clock_put(ipa); } @@ -612,7 +592,6 @@ MODULE_DEVICE_TABLE(of, ipa_match); * */ static void ipa_validate_build(void) { -#ifdef IPA_VALIDATE /* At one time we assumed a 64-bit build, allowing some do_div() * calls to be replaced by simple division or modulo operations. * We currently only perform divide and modulo operations on u32, @@ -646,7 +625,6 @@ static void ipa_validate_build(void) BUILD_BUG_ON(!ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY)); BUILD_BUG_ON(ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY) > field_max(AGGR_GRANULARITY_FMASK)); -#endif /* IPA_VALIDATE */ } static bool ipa_version_valid(enum ipa_version version) @@ -771,9 +749,12 @@ static int ipa_probe(struct platform_device *pdev) if (ret) goto err_table_exit; + /* The clock needs to be active for config and setup */ + ipa_clock_get(ipa); + ret = ipa_config(ipa, data); if (ret) - goto err_modem_exit; + goto err_clock_put; /* Error */ dev_info(dev, "IPA driver initialized"); @@ -782,7 +763,7 @@ static int ipa_probe(struct platform_device *pdev) * we're done here. */ if (modem_init) - return 0; + goto out_clock_put; /* Done; no error */ /* Otherwise we need to load the firmware and have Trust Zone validate * and install it. If that succeeds we can proceed with setup. @@ -795,11 +776,15 @@ static int ipa_probe(struct platform_device *pdev) if (ret) goto err_deconfig; +out_clock_put: + ipa_clock_put(ipa); + return 0; err_deconfig: ipa_deconfig(ipa); -err_modem_exit: +err_clock_put: + ipa_clock_put(ipa); ipa_modem_exit(ipa); err_table_exit: ipa_table_exit(ipa); @@ -825,6 +810,8 @@ static int ipa_remove(struct platform_device *pdev) struct ipa_clock *clock = ipa->clock; int ret; + ipa_clock_get(ipa); + if (ipa->setup_complete) { ret = ipa_modem_stop(ipa); /* If starting or stopping is in progress, try once more */ @@ -839,6 +826,9 @@ static int ipa_remove(struct platform_device *pdev) } ipa_deconfig(ipa); + + ipa_clock_put(ipa); + ipa_modem_exit(ipa); ipa_table_exit(ipa); ipa_endpoint_exit(ipa); @@ -860,62 +850,6 @@ static void ipa_shutdown(struct platform_device *pdev) dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret); } -/** - * ipa_suspend() - Power management system suspend callback - * @dev: IPA device structure - * - * Return: Always returns zero - * - * Called by the PM framework when a system suspend operation is invoked. - * Suspends endpoints and releases the clock reference held to keep - * the IPA clock running until this point. - */ -static int ipa_suspend(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* When a suspended RX endpoint has a packet ready to receive, we - * get an IPA SUSPEND interrupt. We trigger a system resume in - * that case, but only on the first such interrupt since suspend. - */ - __clear_bit(IPA_FLAG_RESUMED, ipa->flags); - - ipa_endpoint_suspend(ipa); - - ipa_clock_put(ipa); - - return 0; -} - -/** - * ipa_resume() - Power management system resume callback - * @dev: IPA device structure - * - * Return: Always returns 0 - * - * Called by the PM framework when a system resume operation is invoked. - * Takes an IPA clock reference to keep the clock running until suspend, - * and resumes endpoints. - */ -static int ipa_resume(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* This clock reference will keep the IPA out of suspend - * until we get a power management suspend request. - */ - ipa_clock_get(ipa); - - ipa_endpoint_resume(ipa); - - return 0; -} - -static const struct dev_pm_ops ipa_pm_ops = { - .suspend = ipa_suspend, - .resume = ipa_resume, -}; - static const struct attribute_group *ipa_attribute_groups[] = { &ipa_attribute_group, &ipa_feature_attribute_group, diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index af9aedbde717..ad4019e8016e 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -19,6 +19,8 @@ #include "ipa_modem.h" #include "ipa_smp2p.h" #include "ipa_qmi.h" +#include "ipa_uc.h" +#include "ipa_clock.h" #define IPA_NETDEV_NAME "rmnet_ipa%d" #define IPA_NETDEV_TAILROOM 0 /* for padding by mux layer */ @@ -43,9 +45,12 @@ static int ipa_open(struct net_device *netdev) struct ipa *ipa = priv->ipa; int ret; + ipa_clock_get(ipa); + ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); if (ret) - return ret; + goto err_clock_put; + ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); if (ret) goto err_disable_tx; @@ -56,6 +61,8 @@ static int ipa_open(struct net_device *netdev) err_disable_tx: ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); +err_clock_put: + ipa_clock_put(ipa); return ret; } @@ -71,6 +78,8 @@ static int ipa_stop(struct net_device *netdev) ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); + ipa_clock_put(ipa); + return 0; } @@ -169,6 +178,9 @@ void ipa_modem_suspend(struct net_device *netdev) struct ipa_priv *priv = netdev_priv(netdev); struct ipa *ipa = priv->ipa; + if (!(netdev->flags & IFF_UP)) + return; + netif_stop_queue(netdev); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); @@ -185,6 +197,9 @@ void ipa_modem_resume(struct net_device *netdev) struct ipa_priv *priv = netdev_priv(netdev); struct ipa *ipa = priv->ipa; + if (!(netdev->flags & IFF_UP)) + return; + ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); @@ -216,13 +231,15 @@ int ipa_modem_start(struct ipa *ipa) SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; + ipa->modem_netdev = netdev; ret = register_netdev(netdev); - if (!ret) { - ipa->modem_netdev = netdev; - ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; - ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; - } else { + if (ret) { + ipa->modem_netdev = NULL; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; free_netdev(netdev); } @@ -256,13 +273,15 @@ int ipa_modem_stop(struct ipa *ipa) /* Prevent the modem from triggering a call to ipa_setup() */ ipa_smp2p_disable(ipa); - /* Stop the queue and disable the endpoints if it's open */ + /* Clean up the netdev and endpoints if it was started */ if (netdev) { - (void)ipa_stop(netdev); + /* If it was opened, stop it first */ + if (netdev->flags & IFF_UP) + (void)ipa_stop(netdev); + unregister_netdev(netdev); + ipa->modem_netdev = NULL; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; - ipa->modem_netdev = NULL; - unregister_netdev(netdev); free_netdev(netdev); } @@ -278,6 +297,8 @@ static void ipa_modem_crashed(struct ipa *ipa) struct device *dev = &ipa->pdev->dev; int ret; + ipa_clock_get(ipa); + ipa_endpoint_modem_pause_all(ipa, true); ipa_endpoint_modem_hol_block_clear_all(ipa); @@ -302,6 +323,8 @@ static void ipa_modem_crashed(struct ipa *ipa) ret = ipa_mem_zero_modem(ipa); if (ret) dev_err(dev, "error %d zeroing modem memory regions\n", ret); + + ipa_clock_put(ipa); } static int ipa_modem_notify(struct notifier_block *nb, unsigned long action, @@ -314,6 +337,7 @@ static int ipa_modem_notify(struct notifier_block *nb, unsigned long action, switch (action) { case QCOM_SSR_BEFORE_POWERUP: dev_info(dev, "received modem starting event\n"); + ipa_uc_clock(ipa); ipa_smp2p_notify_reset(ipa); break; @@ -377,13 +401,3 @@ void ipa_modem_deconfig(struct ipa *ipa) ipa->notifier = NULL; memset(&ipa->nb, 0, sizeof(ipa->nb)); } - -int ipa_modem_setup(struct ipa *ipa) -{ - return ipa_qmi_setup(ipa); -} - -void ipa_modem_teardown(struct ipa *ipa) -{ - ipa_qmi_teardown(ipa); -} diff --git a/drivers/net/ipa/ipa_modem.h b/drivers/net/ipa/ipa_modem.h index 2de3e216d1d4..5e6e3d234454 100644 --- a/drivers/net/ipa/ipa_modem.h +++ b/drivers/net/ipa/ipa_modem.h @@ -7,7 +7,6 @@ #define _IPA_MODEM_H_ struct ipa; -struct ipa_endpoint; struct net_device; struct sk_buff; @@ -25,7 +24,4 @@ void ipa_modem_exit(struct ipa *ipa); int ipa_modem_config(struct ipa *ipa); void ipa_modem_deconfig(struct ipa *ipa); -int ipa_modem_setup(struct ipa *ipa); -void ipa_modem_teardown(struct ipa *ipa); - #endif /* _IPA_MODEM_H_ */ diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c index 4661105ce7ab..90f3aec55b36 100644 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@ -467,10 +467,7 @@ static const struct qmi_ops ipa_client_ops = { .new_server = ipa_client_new_server, }; -/* This is called by ipa_setup(). We can be informed via remoteproc that - * the modem has shut down, in which case this function will be called - * again to prepare for it coming back up again. - */ +/* Set up for QMI message exchange */ int ipa_qmi_setup(struct ipa *ipa) { struct ipa_qmi *ipa_qmi = &ipa->qmi; @@ -526,6 +523,7 @@ err_server_handle_release: return ret; } +/* Tear down IPA QMI handles */ void ipa_qmi_teardown(struct ipa *ipa) { cancel_work_sync(&ipa->qmi.init_driver_work); diff --git a/drivers/net/ipa/ipa_qmi.h b/drivers/net/ipa/ipa_qmi.h index b6f2055d35a6..856ef629ccc8 100644 --- a/drivers/net/ipa/ipa_qmi.h +++ b/drivers/net/ipa/ipa_qmi.h @@ -39,7 +39,26 @@ struct ipa_qmi { bool indication_sent; }; +/** + * ipa_qmi_setup() - Set up for QMI message exchange + * @ipa: IPA pointer + * + * This is called at the end of ipa_setup(), to prepare for the exchange + * of QMI messages that perform a "handshake" between the AP and modem. + * When the modem QMI server announces its presence, an AP request message + * supplies operating parameters to be used to the modem, and the modem + * acknowledges receipt of those parameters. The modem will not touch the + * IPA hardware until this handshake is complete. + * + * If the modem crashes (or shuts down) a new handshake begins when the + * modem's QMI server is started again. + */ int ipa_qmi_setup(struct ipa *ipa); + +/** + * ipa_qmi_teardown() - Tear down IPA QMI handles + * @ipa: IPA pointer + */ void ipa_qmi_teardown(struct ipa *ipa); #endif /* !_IPA_QMI_H_ */ diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index b89dec5865a5..a5b355384d4a 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -99,7 +99,7 @@ struct ipa; static inline u32 arbitration_lock_disable_encoded(enum ipa_version version, u32 mask) { - /* assert(version >= IPA_VERSION_4_0); */ + WARN_ON(version < IPA_VERSION_4_0); if (version < IPA_VERSION_4_9) return u32_encode_bits(mask, GENMASK(20, 17)); @@ -116,7 +116,7 @@ static inline u32 full_flush_rsc_closure_en_encoded(enum ipa_version version, { u32 val = enable ? 1 : 0; - /* assert(version >= IPA_VERSION_4_5); */ + WARN_ON(version < IPA_VERSION_4_5); if (version == IPA_VERSION_4_5 || version == IPA_VERSION_4_7) return u32_encode_bits(val, GENMASK(21, 21)); @@ -409,7 +409,7 @@ static inline u32 ipa_header_size_encoded(enum ipa_version version, val = u32_encode_bits(size, HDR_LEN_FMASK); if (version < IPA_VERSION_4_5) { - /* ipa_assert(header_size == size); */ + WARN_ON(header_size != size); return val; } @@ -429,7 +429,7 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version, val = u32_encode_bits(off, HDR_OFST_METADATA_FMASK); if (version < IPA_VERSION_4_5) { - /* ipa_assert(offset == off); */ + WARN_ON(offset != off); return val; } @@ -812,7 +812,7 @@ ipa_reg_irq_suspend_info_offset(enum ipa_version version) static inline u32 ipa_reg_irq_suspend_en_ee_n_offset(enum ipa_version version, u32 ee) { - /* assert(version != IPA_VERSION_3_0); */ + WARN_ON(version == IPA_VERSION_3_0); if (version < IPA_VERSION_4_9) return 0x00003034 + 0x1000 * ee; @@ -830,7 +830,7 @@ ipa_reg_irq_suspend_en_offset(enum ipa_version version) static inline u32 ipa_reg_irq_suspend_clr_ee_n_offset(enum ipa_version version, u32 ee) { - /* assert(version != IPA_VERSION_3_0); */ + WARN_ON(version == IPA_VERSION_3_0); if (version < IPA_VERSION_4_9) return 0x00003038 + 0x1000 * ee; diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c index 3b2dc216d3a6..e3da95d69409 100644 --- a/drivers/net/ipa/ipa_resource.c +++ b/drivers/net/ipa/ipa_resource.c @@ -29,7 +29,6 @@ static bool ipa_resource_limits_valid(struct ipa *ipa, const struct ipa_resource_data *data) { -#ifdef IPA_VALIDATION u32 group_count; u32 i; u32 j; @@ -65,7 +64,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa, if (resource->limits[j].min || resource->limits[j].max) return false; } -#endif /* !IPA_VALIDATION */ + return true; } diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index 93270e50b6b3..0d15438a79e2 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -156,11 +156,16 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) if (!smp2p->disabled) { int ret; + /* The clock needs to be active for setup */ + ipa_clock_get(smp2p->ipa); + ret = ipa_setup(smp2p->ipa); if (ret) dev_err(&smp2p->ipa->pdev->dev, "error %d from ipa_setup()\n", ret); smp2p->disabled = true; + + ipa_clock_put(smp2p->ipa); } mutex_unlock(&smp2p->mutex); diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index c617a9156f26..2324e1b93e37 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -120,8 +120,6 @@ */ #define IPA_ZERO_RULE_SIZE (2 * sizeof(__le32)) -#ifdef IPA_VALIDATE - /* Check things that can be validated at build time. */ static void ipa_table_validate_build(void) { @@ -161,7 +159,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route) else size = (1 + IPA_FILTER_COUNT_MAX) * sizeof(__le64); - if (!ipa_cmd_table_valid(ipa, mem, route, ipv6, hashed)) + if (!ipa_cmd_table_valid(ipa, mem, route)) return false; /* mem->size >= size is sufficient, but we'll demand more */ @@ -169,7 +167,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route) return true; /* Hashed table regions can be zero size if hashing is not supported */ - if (hashed && !mem->size) + if (ipa_table_hash_support(ipa) && !mem->size) return true; dev_err(dev, "%s table region %u size 0x%02x, expected 0x%02x\n", @@ -183,14 +181,22 @@ bool ipa_table_valid(struct ipa *ipa) { bool valid; - valid = ipa_table_valid_one(IPA_MEM_V4_FILTER, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V4_FILTER_HASHED, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER_HASHED, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE, true); - valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE_HASHED, true); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE, true); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE_HASHED, true); + valid = ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER, false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER, false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE, true); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE, true); + + if (!ipa_table_hash_support(ipa)) + return valid; + + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER_HASHED, + false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER_HASHED, + false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE_HASHED, + true); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE_HASHED, + true); return valid; } @@ -217,14 +223,6 @@ bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_map) return true; } -#else /* !IPA_VALIDATE */ -static void ipa_table_validate_build(void) - -{ -} - -#endif /* !IPA_VALIDATE */ - /* Zero entry count means no table, so just return a 0 address */ static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count) { @@ -233,7 +231,7 @@ static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count) if (!count) return 0; -/* assert(count <= max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX)); */ + WARN_ON(count > max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX)); /* Skip over the zero rule and possibly the filter mask */ skip = filter_mask ? 1 : 2; diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h index 1e2be9fce2f8..b6a9a0d79d68 100644 --- a/drivers/net/ipa/ipa_table.h +++ b/drivers/net/ipa/ipa_table.h @@ -16,8 +16,6 @@ struct ipa; /* The maximum number of route table entries (IPv4, IPv6; hashed or not) */ #define IPA_ROUTE_COUNT_MAX 15 -#ifdef IPA_VALIDATE - /** * ipa_table_valid() - Validate route and filter table memory regions * @ipa: IPA pointer @@ -35,20 +33,6 @@ bool ipa_table_valid(struct ipa *ipa); */ bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask); -#else /* !IPA_VALIDATE */ - -static inline bool ipa_table_valid(struct ipa *ipa) -{ - return true; -} - -static inline bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask) -{ - return true; -} - -#endif /* !IPA_VALIDATE */ - /** * ipa_table_hash_support() - Return true if hashed tables are supported * @ipa: IPA pointer diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index fd9219863234..f88ee02457d4 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -131,7 +131,7 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id) if (shared->event == IPA_UC_EVENT_ERROR) dev_err(dev, "microcontroller error event\n"); else if (shared->event != IPA_UC_EVENT_LOG_INFO) - dev_err(dev, "unsupported microcontroller event %hhu\n", + dev_err(dev, "unsupported microcontroller event %u\n", shared->event); /* The LOG_INFO event can be safely ignored */ } @@ -140,55 +140,65 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id) static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); + struct device *dev = &ipa->pdev->dev; /* An INIT_COMPLETED response message is sent to the AP by the * microcontroller when it is operational. Other than this, the AP * should only receive responses from the microcontroller when it has * sent it a request message. * - * We can drop the clock reference taken in ipa_uc_setup() once we + * We can drop the clock reference taken in ipa_uc_clock() once we * know the microcontroller has finished its initialization. */ switch (shared->response) { case IPA_UC_RESPONSE_INIT_COMPLETED: - ipa->uc_loaded = true; - ipa_clock_put(ipa); + if (ipa->uc_clocked) { + ipa->uc_loaded = true; + ipa_clock_put(ipa); + ipa->uc_clocked = false; + } else { + dev_warn(dev, "unexpected init_completed response\n"); + } break; default: - dev_warn(&ipa->pdev->dev, - "unsupported microcontroller response %hhu\n", + dev_warn(dev, "unsupported microcontroller response %u\n", shared->response); break; } } -/* ipa_uc_setup() - Set up the microcontroller */ -void ipa_uc_setup(struct ipa *ipa) +/* Configure the IPA microcontroller subsystem */ +void ipa_uc_config(struct ipa *ipa) { - /* The microcontroller needs the IPA clock running until it has - * completed its initialization. It signals this by sending an - * INIT_COMPLETED response message to the AP. This could occur after - * we have finished doing the rest of the IPA initialization, so we - * need to take an extra "proxy" reference, and hold it until we've - * received that signal. (This reference is dropped in - * ipa_uc_response_hdlr(), above.) - */ - ipa_clock_get(ipa); - + ipa->uc_clocked = false; ipa->uc_loaded = false; ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_0, ipa_uc_event_handler); ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_1, ipa_uc_response_hdlr); } -/* Inverse of ipa_uc_setup() */ -void ipa_uc_teardown(struct ipa *ipa) +/* Inverse of ipa_uc_config() */ +void ipa_uc_deconfig(struct ipa *ipa) { ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1); ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0); - if (!ipa->uc_loaded) + if (ipa->uc_clocked) ipa_clock_put(ipa); } +/* Take a proxy clock reference for the microcontroller */ +void ipa_uc_clock(struct ipa *ipa) +{ + static bool already; + + if (already) + return; + already = true; /* Only do this on first boot */ + + /* This clock reference dropped in ipa_uc_response_hdlr() above */ + ipa_clock_get(ipa); + ipa->uc_clocked = true; +} + /* Send a command to the microcontroller */ static void send_uc_command(struct ipa *ipa, u32 command, u32 command_param) { diff --git a/drivers/net/ipa/ipa_uc.h b/drivers/net/ipa/ipa_uc.h index e8510899a3f0..14e4e1115aa7 100644 --- a/drivers/net/ipa/ipa_uc.h +++ b/drivers/net/ipa/ipa_uc.h @@ -9,16 +9,30 @@ struct ipa; /** - * ipa_uc_setup() - set up the IPA microcontroller subsystem + * ipa_uc_config() - Configure the IPA microcontroller subsystem * @ipa: IPA pointer */ -void ipa_uc_setup(struct ipa *ipa); +void ipa_uc_config(struct ipa *ipa); /** - * ipa_uc_teardown() - inverse of ipa_uc_setup() + * ipa_uc_deconfig() - Inverse of ipa_uc_config() * @ipa: IPA pointer */ -void ipa_uc_teardown(struct ipa *ipa); +void ipa_uc_deconfig(struct ipa *ipa); + +/** + * ipa_uc_clock() - Take a proxy clock reference for the microcontroller + * @ipa: IPA pointer + * + * The first time the modem boots, it loads firmware for and starts the + * IPA-resident microcontroller. The microcontroller signals that it + * has completed its initialization by sending an INIT_COMPLETED response + * message to the AP. The AP must ensure the IPA core clock is operating + * until it receives this message, and to do so we take a "proxy" clock + * reference on its behalf here. Once we receive the INIT_COMPLETED + * message (in ipa_uc_response_hdlr()) we drop this clock reference. + */ +void ipa_uc_clock(struct ipa *ipa); /** * ipa_uc_panic_notifier() diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a707502a0c0f..c0b21a5580d5 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -732,6 +732,7 @@ static int ipvlan_device_event(struct notifier_block *unused, port = ipvlan_port_get_rtnl(dev); switch (event) { + case NETDEV_UP: case NETDEV_CHANGE: list_for_each_entry(ipvlan, &port->ipvlans, pnode) netif_stacked_transfer_operstate(ipvlan->phy_dev, diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 80de9768ecd4..35f46ad040b0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -829,7 +829,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int macvlan_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = macvlan_dev_real_dev(dev); const struct net_device_ops *ops = real_dev->netdev_ops; @@ -845,8 +845,8 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; fallthrough; case SIOCGHWTSTAMP: - if (netif_device_present(real_dev) && ops->ndo_do_ioctl) - err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); + if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) + err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); break; } @@ -1151,7 +1151,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_stop = macvlan_stop, .ndo_start_xmit = macvlan_start_xmit, .ndo_change_mtu = macvlan_change_mtu, - .ndo_do_ioctl = macvlan_do_ioctl, + .ndo_eth_ioctl = macvlan_eth_ioctl, .ndo_fix_features = macvlan_fix_features, .ndo_change_rx_flags = macvlan_change_rx_flags, .ndo_set_mac_address = macvlan_set_mac_address, diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig new file mode 100644 index 000000000000..d8f966cedc89 --- /dev/null +++ b/drivers/net/mctp/Kconfig @@ -0,0 +1,8 @@ + +if MCTP + +menu "MCTP Device Drivers" + +endmenu + +endif diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/drivers/net/mctp/Makefile diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile deleted file mode 100644 index f71b9f8f3c4f..000000000000 --- a/drivers/net/mhi/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_MHI_NET) += mhi_net.o - -mhi_net-y := net.o proto_mbim.o diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h deleted file mode 100644 index 1d0c499d27a3..000000000000 --- a/drivers/net/mhi/mhi.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* MHI Network driver - Network over MHI bus - * - * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> - */ - -struct mhi_net_stats { - u64_stats_t rx_packets; - u64_stats_t rx_bytes; - u64_stats_t rx_errors; - u64_stats_t rx_dropped; - u64_stats_t rx_length_errors; - u64_stats_t tx_packets; - u64_stats_t tx_bytes; - u64_stats_t tx_errors; - u64_stats_t tx_dropped; - struct u64_stats_sync tx_syncp; - struct u64_stats_sync rx_syncp; -}; - -struct mhi_net_dev { - struct mhi_device *mdev; - struct net_device *ndev; - struct sk_buff *skbagg_head; - struct sk_buff *skbagg_tail; - const struct mhi_net_proto *proto; - void *proto_data; - struct delayed_work rx_refill; - struct mhi_net_stats stats; - u32 rx_queue_sz; - int msg_enable; - unsigned int mru; -}; - -struct mhi_net_proto { - int (*init)(struct mhi_net_dev *mhi_netdev); - struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); - void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); -}; - -extern const struct mhi_net_proto proto_mbim; diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c deleted file mode 100644 index bf1ad863237d..000000000000 --- a/drivers/net/mhi/proto_mbim.c +++ /dev/null @@ -1,304 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* MHI Network driver - Network over MHI bus - * - * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> - * - * This driver copy some code from cdc_ncm, which is: - * Copyright (C) ST-Ericsson 2010-2012 - * and cdc_mbim, which is: - * Copyright (c) 2012 Smith Micro Software, Inc. - * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> - * - */ - -#include <linux/ethtool.h> -#include <linux/if_vlan.h> -#include <linux/ip.h> -#include <linux/mii.h> -#include <linux/netdevice.h> -#include <linux/wwan.h> -#include <linux/skbuff.h> -#include <linux/usb.h> -#include <linux/usb/cdc.h> -#include <linux/usb/usbnet.h> -#include <linux/usb/cdc_ncm.h> - -#include "mhi.h" - -#define MBIM_NDP16_SIGN_MASK 0x00ffffff - -/* Usual WWAN MTU */ -#define MHI_MBIM_DEFAULT_MTU 1500 - -/* 3500 allows to optimize skb allocation, the skbs will basically fit in - * one 4K page. Large MBIM packets will simply be split over several MHI - * transfers and chained by the MHI net layer (zerocopy). - */ -#define MHI_MBIM_DEFAULT_MRU 3500 - -struct mbim_context { - u16 rx_seq; - u16 tx_seq; -}; - -static void __mbim_length_errors_inc(struct mhi_net_dev *dev) -{ - u64_stats_update_begin(&dev->stats.rx_syncp); - u64_stats_inc(&dev->stats.rx_length_errors); - u64_stats_update_end(&dev->stats.rx_syncp); -} - -static void __mbim_errors_inc(struct mhi_net_dev *dev) -{ - u64_stats_update_begin(&dev->stats.rx_syncp); - u64_stats_inc(&dev->stats.rx_errors); - u64_stats_update_end(&dev->stats.rx_syncp); -} - -static int mbim_rx_verify_nth16(struct sk_buff *skb) -{ - struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev); - struct mbim_context *ctx = dev->proto_data; - struct usb_cdc_ncm_nth16 *nth16; - int len; - - if (skb->len < sizeof(struct usb_cdc_ncm_nth16) + - sizeof(struct usb_cdc_ncm_ndp16)) { - netif_dbg(dev, rx_err, dev->ndev, "frame too short\n"); - __mbim_length_errors_inc(dev); - return -EINVAL; - } - - nth16 = (struct usb_cdc_ncm_nth16 *)skb->data; - - if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { - netif_dbg(dev, rx_err, dev->ndev, - "invalid NTH16 signature <%#010x>\n", - le32_to_cpu(nth16->dwSignature)); - __mbim_errors_inc(dev); - return -EINVAL; - } - - /* No limit on the block length, except the size of the data pkt */ - len = le16_to_cpu(nth16->wBlockLength); - if (len > skb->len) { - netif_dbg(dev, rx_err, dev->ndev, - "NTB does not fit into the skb %u/%u\n", len, - skb->len); - __mbim_length_errors_inc(dev); - return -EINVAL; - } - - if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && - (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) && - !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { - netif_dbg(dev, rx_err, dev->ndev, - "sequence number glitch prev=%d curr=%d\n", - ctx->rx_seq, le16_to_cpu(nth16->wSequence)); - } - ctx->rx_seq = le16_to_cpu(nth16->wSequence); - - return le16_to_cpu(nth16->wNdpIndex); -} - -static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16) -{ - struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev); - int ret; - - if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { - netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n", - le16_to_cpu(ndp16->wLength)); - return -EINVAL; - } - - ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) - / sizeof(struct usb_cdc_ncm_dpe16)); - ret--; /* Last entry is always a NULL terminator */ - - if (sizeof(struct usb_cdc_ncm_ndp16) + - ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) { - netif_dbg(dev, rx_err, dev->ndev, - "Invalid nframes = %d\n", ret); - return -EINVAL; - } - - return ret; -} - -static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) -{ - struct net_device *ndev = mhi_netdev->ndev; - int ndpoffset; - - /* Check NTB header and retrieve first NDP offset */ - ndpoffset = mbim_rx_verify_nth16(skb); - if (ndpoffset < 0) { - net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name); - goto error; - } - - /* Process each NDP */ - while (1) { - struct usb_cdc_ncm_ndp16 ndp16; - struct usb_cdc_ncm_dpe16 dpe16; - int nframes, n, dpeoffset; - - if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) { - net_err_ratelimited("%s: Incorrect NDP offset (%u)\n", - ndev->name, ndpoffset); - __mbim_length_errors_inc(mhi_netdev); - goto error; - } - - /* Check NDP header and retrieve number of datagrams */ - nframes = mbim_rx_verify_ndp16(skb, &ndp16); - if (nframes < 0) { - net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name); - __mbim_length_errors_inc(mhi_netdev); - goto error; - } - - /* Only IP data type supported, no DSS in MHI context */ - if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) - != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { - net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name); - __mbim_errors_inc(mhi_netdev); - goto next_ndp; - } - - /* Only primary IP session 0 (0x00) supported for now */ - if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) { - net_err_ratelimited("%s: bad packet session\n", ndev->name); - __mbim_errors_inc(mhi_netdev); - goto next_ndp; - } - - /* de-aggregate and deliver IP packets */ - dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16); - for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) { - u16 dgram_offset, dgram_len; - struct sk_buff *skbn; - - if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16))) - break; - - dgram_offset = le16_to_cpu(dpe16.wDatagramIndex); - dgram_len = le16_to_cpu(dpe16.wDatagramLength); - - if (!dgram_offset || !dgram_len) - break; /* null terminator */ - - skbn = netdev_alloc_skb(ndev, dgram_len); - if (!skbn) - continue; - - skb_put(skbn, dgram_len); - skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len); - - switch (skbn->data[0] & 0xf0) { - case 0x40: - skbn->protocol = htons(ETH_P_IP); - break; - case 0x60: - skbn->protocol = htons(ETH_P_IPV6); - break; - default: - net_err_ratelimited("%s: unknown protocol\n", - ndev->name); - __mbim_errors_inc(mhi_netdev); - dev_kfree_skb_any(skbn); - continue; - } - - netif_rx(skbn); - } -next_ndp: - /* Other NDP to process? */ - ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex); - if (!ndpoffset) - break; - } - - /* free skb */ - dev_consume_skb_any(skb); - return; -error: - dev_kfree_skb_any(skb); -} - -struct mbim_tx_hdr { - struct usb_cdc_ncm_nth16 nth16; - struct usb_cdc_ncm_ndp16 ndp16; - struct usb_cdc_ncm_dpe16 dpe16[2]; -} __packed; - -static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev, - struct sk_buff *skb) -{ - struct mbim_context *ctx = mhi_netdev->proto_data; - unsigned int dgram_size = skb->len; - struct usb_cdc_ncm_nth16 *nth16; - struct usb_cdc_ncm_ndp16 *ndp16; - struct mbim_tx_hdr *mbim_hdr; - - /* For now, this is a partial implementation of CDC MBIM, only one NDP - * is sent, containing the IP packet (no aggregation). - */ - - /* Ensure we have enough headroom for crafting MBIM header */ - if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { - dev_kfree_skb_any(skb); - return NULL; - } - - mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); - - /* Fill NTB header */ - nth16 = &mbim_hdr->nth16; - nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); - nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); - nth16->wSequence = cpu_to_le16(ctx->tx_seq++); - nth16->wBlockLength = cpu_to_le16(skb->len); - nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); - - /* Fill the unique NDP */ - ndp16 = &mbim_hdr->ndp16; - ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); - ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) - + sizeof(struct usb_cdc_ncm_dpe16) * 2); - ndp16->wNextNdpIndex = 0; - - /* Datagram follows the mbim header */ - ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); - ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); - - /* null termination */ - ndp16->dpe16[1].wDatagramIndex = 0; - ndp16->dpe16[1].wDatagramLength = 0; - - return skb; -} - -static int mbim_init(struct mhi_net_dev *mhi_netdev) -{ - struct net_device *ndev = mhi_netdev->ndev; - - mhi_netdev->proto_data = devm_kzalloc(&ndev->dev, - sizeof(struct mbim_context), - GFP_KERNEL); - if (!mhi_netdev->proto_data) - return -ENOMEM; - - ndev->needed_headroom = sizeof(struct mbim_tx_hdr); - ndev->mtu = MHI_MBIM_DEFAULT_MTU; - mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU; - - return 0; -} - -const struct mhi_net_proto proto_mbim = { - .init = mbim_init, - .rx = mbim_rx, - .tx_fixup = mbim_tx_fixup, -}; diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi_net.c index 11be6bcdd551..975f7f9bdf4c 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi_net.c @@ -11,28 +11,42 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/u64_stats_sync.h> -#include <linux/wwan.h> - -#include "mhi.h" #define MHI_NET_MIN_MTU ETH_MIN_MTU #define MHI_NET_MAX_MTU 0xffff #define MHI_NET_DEFAULT_MTU 0x4000 -/* When set to false, the default netdev (link 0) is not created, and it's up - * to user to create the link (via wwan rtnetlink). - */ -static bool create_default_iface = true; -module_param(create_default_iface, bool, 0); +struct mhi_net_stats { + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; +}; + +struct mhi_net_dev { + struct mhi_device *mdev; + struct net_device *ndev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; + struct delayed_work rx_refill; + struct mhi_net_stats stats; + u32 rx_queue_sz; + int msg_enable; + unsigned int mru; +}; struct mhi_device_info { const char *netname; - const struct mhi_net_proto *proto; }; static int mhi_ndo_open(struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); /* Feed the rx buffer pool */ schedule_delayed_work(&mhi_netdev->rx_refill, 0); @@ -47,7 +61,7 @@ static int mhi_ndo_open(struct net_device *ndev) static int mhi_ndo_stop(struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); netif_stop_queue(ndev); netif_carrier_off(ndev); @@ -58,17 +72,10 @@ static int mhi_ndo_stop(struct net_device *ndev) static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); - const struct mhi_net_proto *proto = mhi_netdev->proto; + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); struct mhi_device *mdev = mhi_netdev->mdev; int err; - if (proto && proto->tx_fixup) { - skb = proto->tx_fixup(mhi_netdev, skb); - if (unlikely(!skb)) - goto exit_drop; - } - err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); if (unlikely(err)) { net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", @@ -93,7 +100,7 @@ exit_drop: static void mhi_ndo_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); unsigned int start; do { @@ -101,8 +108,6 @@ static void mhi_ndo_get_stats64(struct net_device *ndev, stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets); stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes); stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors); - stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped); - stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors); } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start)); do { @@ -165,7 +170,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, struct mhi_result *mhi_res) { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); - const struct mhi_net_proto *proto = mhi_netdev->proto; struct sk_buff *skb = mhi_res->buf_addr; int free_desc_count; @@ -205,11 +209,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, mhi_netdev->skbagg_head = NULL; } - u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); - u64_stats_inc(&mhi_netdev->stats.rx_packets); - u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); - u64_stats_update_end(&mhi_netdev->stats.rx_syncp); - switch (skb->data[0] & 0xf0) { case 0x40: skb->protocol = htons(ETH_P_IP); @@ -222,10 +221,11 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, break; } - if (proto && proto->rx) - proto->rx(mhi_netdev, skb); - else - netif_rx(skb); + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_packets); + u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); + netif_rx(skb); } /* Refill if RX buffers queue becomes low */ @@ -248,7 +248,6 @@ static void mhi_net_ul_callback(struct mhi_device *mhi_dev, u64_stats_update_begin(&mhi_netdev->stats.tx_syncp); if (unlikely(mhi_res->transaction_status)) { - /* MHI layer stopping/resetting the UL channel */ if (mhi_res->transaction_status == -ENOTCONN) { u64_stats_update_end(&mhi_netdev->stats.tx_syncp); @@ -302,33 +301,18 @@ static void mhi_net_rx_refill_work(struct work_struct *work) schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); } -static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, - struct netlink_ext_ack *extack) +static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev) { - const struct mhi_device_info *info; - struct mhi_device *mhi_dev = ctxt; struct mhi_net_dev *mhi_netdev; int err; - info = (struct mhi_device_info *)mhi_dev->id->driver_data; - - /* For now we only support one link (link context 0), driver must be - * reworked to break 1:1 relationship for net MBIM and to forward setup - * call to rmnet(QMAP) otherwise. - */ - if (if_id != 0) - return -EINVAL; - - if (dev_get_drvdata(&mhi_dev->dev)) - return -EBUSY; - - mhi_netdev = wwan_netdev_drvpriv(ndev); + mhi_netdev = netdev_priv(ndev); dev_set_drvdata(&mhi_dev->dev, mhi_netdev); mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; mhi_netdev->skbagg_head = NULL; - mhi_netdev->proto = info->proto; + mhi_netdev->mru = mhi_dev->mhi_cntrl->mru; INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work); u64_stats_init(&mhi_netdev->stats.rx_syncp); @@ -342,38 +326,22 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, /* Number of transfer descriptors determines size of the queue */ mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); - if (extack) - err = register_netdevice(ndev); - else - err = register_netdev(ndev); + err = register_netdev(ndev); if (err) - goto out_err; - - if (mhi_netdev->proto) { - err = mhi_netdev->proto->init(mhi_netdev); - if (err) - goto out_err_proto; - } + return err; return 0; -out_err_proto: - unregister_netdevice(ndev); out_err: free_netdev(ndev); return err; } -static void mhi_net_dellink(void *ctxt, struct net_device *ndev, - struct list_head *head) +static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); - struct mhi_device *mhi_dev = ctxt; + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); - if (head) - unregister_netdevice_queue(ndev, head); - else - unregister_netdev(ndev); + unregister_netdev(ndev); mhi_unprepare_from_transfer(mhi_dev); @@ -382,65 +350,34 @@ static void mhi_net_dellink(void *ctxt, struct net_device *ndev, dev_set_drvdata(&mhi_dev->dev, NULL); } -static const struct wwan_ops mhi_wwan_ops = { - .priv_size = sizeof(struct mhi_net_dev), - .setup = mhi_net_setup, - .newlink = mhi_net_newlink, - .dellink = mhi_net_dellink, -}; - static int mhi_net_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data; - struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; struct net_device *ndev; int err; - err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev, - WWAN_NO_DEFAULT_LINK); - if (err) - return err; - - if (!create_default_iface) - return 0; - - /* Create a default interface which is used as either RMNET real-dev, - * MBIM link 0 or ip link 0) - */ ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname, NET_NAME_PREDICTABLE, mhi_net_setup); - if (!ndev) { - err = -ENOMEM; - goto err_unregister; - } + if (!ndev) + return -ENOMEM; SET_NETDEV_DEV(ndev, &mhi_dev->dev); - err = mhi_net_newlink(mhi_dev, ndev, 0, NULL); - if (err) - goto err_release; + err = mhi_net_newlink(mhi_dev, ndev); + if (err) { + free_netdev(ndev); + return err; + } return 0; - -err_release: - free_netdev(ndev); -err_unregister: - wwan_unregister_ops(&cntrl->mhi_dev->dev); - - return err; } static void mhi_net_remove(struct mhi_device *mhi_dev) { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); - struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; - - /* WWAN core takes care of removing remaining links */ - wwan_unregister_ops(&cntrl->mhi_dev->dev); - if (create_default_iface) - mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL); + mhi_net_dellink(mhi_dev, mhi_netdev->ndev); } static const struct mhi_device_info mhi_hwip0 = { @@ -451,18 +388,11 @@ static const struct mhi_device_info mhi_swip0 = { .netname = "mhi_swip%d", }; -static const struct mhi_device_info mhi_hwip0_mbim = { - .netname = "mhi_mbim%d", - .proto = &proto_mbim, -}; - static const struct mhi_device_id mhi_net_id_table[] = { /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */ { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 }, /* Software data PATH (to modem CPU) */ { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 }, - /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */ - { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim }, {} }; MODULE_DEVICE_TABLE(mhi, mhi_net_id_table); diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index ccec29970d5b..a29ec264119d 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -195,12 +193,10 @@ new_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -212,8 +208,6 @@ del_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -224,12 +218,10 @@ del_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -262,29 +254,31 @@ static struct device_type nsim_bus_dev_type = { }; static struct nsim_bus_dev * -nsim_bus_dev_new(unsigned int id, unsigned int port_count); +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues); static ssize_t new_device_store(struct bus_type *bus, const char *buf, size_t count) { + unsigned int id, port_count, num_queues; struct nsim_bus_dev *nsim_bus_dev; - unsigned int port_count; - unsigned int id; int err; - err = sscanf(buf, "%u %u", &id, &port_count); + err = sscanf(buf, "%u %u %u", &id, &port_count, &num_queues); switch (err) { case 1: port_count = 1; fallthrough; case 2: + num_queues = 1; + fallthrough; + case 3: if (id > INT_MAX) { pr_err("Value of \"id\" is too big.\n"); return -EINVAL; } break; default: - pr_err("Format for adding new device is \"id port_count\" (uint uint).\n"); + pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n"); return -EINVAL; } @@ -295,7 +289,7 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count) goto err; } - nsim_bus_dev = nsim_bus_dev_new(id, port_count); + nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues); if (IS_ERR(nsim_bus_dev)) { err = PTR_ERR(nsim_bus_dev); goto err; @@ -397,7 +391,7 @@ static struct bus_type nsim_bus = { #define NSIM_BUS_DEV_MAX_VFS 4 static struct nsim_bus_dev * -nsim_bus_dev_new(unsigned int id, unsigned int port_count) +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues) { struct nsim_bus_dev *nsim_bus_dev; int err; @@ -413,6 +407,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.bus = &nsim_bus; nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; + nsim_bus_dev->num_queues = num_queues; nsim_bus_dev->initial_net = current->nsproxy->net_ns; nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 6348307bfa84..ff5714209b86 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -864,16 +864,23 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_bus_dev *nsim_bus_dev; + + nsim_bus_dev = nsim_dev->nsim_bus_dev; + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EOPNOTSUPP; if (nsim_dev->dont_allow_reload) { /* For testing purposes, user set debugfs dont_allow_reload * value to true. So forbid it. */ NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return -EOPNOTSUPP; } nsim_dev_reload_destroy(nsim_dev); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return 0; } @@ -1431,10 +1438,10 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) struct devlink *devlink; int err; - devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev)); + devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev), + nsim_bus_dev->initial_net); if (!devlink) return -ENOMEM; - devlink_net_set(devlink, nsim_bus_dev->initial_net); nsim_dev = devlink_priv(devlink); nsim_dev->nsim_bus_dev = nsim_bus_dev; nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 213d3e5056c8..4300261e2f9e 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -1441,7 +1441,7 @@ static u64 nsim_fib_nexthops_res_occ_get(void *priv) static void nsim_fib_set_max_all(struct nsim_fib_data *data, struct devlink *devlink) { - enum nsim_resource_id res_ids[] = { + static const enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_NEXTHOPS, diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index c3aeb15843e2..50572e0f1f52 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -347,7 +347,8 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) struct netdevsim *ns; int err; - dev = alloc_netdev(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup); + dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup, + nsim_dev->nsim_bus_dev->num_queues); if (!dev) return ERR_PTR(-ENOMEM); @@ -392,7 +393,8 @@ void nsim_destroy(struct netdevsim *ns) static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT]\" > /sys/bus/netdevsim/new_device"); + NL_SET_ERR_MSG_MOD(extack, + "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device"); return -EOPNOTSUPP; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index ae462957dcee..1c20bcbd9d91 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -352,6 +352,7 @@ struct nsim_bus_dev { struct device dev; struct list_head list; unsigned int port_count; + unsigned int num_queues; /* Number of queues for each port on this bus */ struct net *initial_net; /* Purpose of this is to carry net pointer * during the probe time only. */ diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index c56f703ae998..7564ae0c1997 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -207,6 +207,12 @@ config MARVELL_88X2222_PHY Support for the Marvell 88X2222 Dual-port Multi-speed Ethernet Transceiver. +config MAXLINEAR_GPHY + tristate "Maxlinear Ethernet PHYs" + help + Support for the Maxlinear GPY115, GPY211, GPY212, GPY215, + GPY241, GPY245 PHYs. + config MEDIATEK_GE_PHY tristate "MediaTek Gigabit Ethernet PHYs" help diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 172bb193ae6a..b2728d00fc9a 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_LXT_PHY) += lxt.o obj-$(CONFIG_MARVELL_10G_PHY) += marvell10g.o obj-$(CONFIG_MARVELL_PHY) += marvell.o obj-$(CONFIG_MARVELL_88X2222_PHY) += marvell-88x2222.o +obj-$(CONFIG_MAXLINEAR_GPHY) += mxl-gpy.o obj-$(CONFIG_MEDIATEK_GE_PHY) += mediatek-ge.o obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 5d62b85a4024..bdac087058b2 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -532,12 +532,6 @@ static int at8031_register_regulators(struct phy_device *phydev) return 0; } -static bool at803x_match_phy_id(struct phy_device *phydev, u32 phy_id) -{ - return (phydev->phy_id & phydev->drv->phy_id_mask) - == (phy_id & phydev->drv->phy_id_mask); -} - static int at803x_parse_dt(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; @@ -602,8 +596,8 @@ static int at803x_parse_dt(struct phy_device *phydev) * to the AR8030 so there might be a good chance it works on * the AR8030 too. */ - if (at803x_match_phy_id(phydev, ATH8030_PHY_ID) || - at803x_match_phy_id(phydev, ATH8035_PHY_ID)) { + if (phydev->drv->phy_id == ATH8030_PHY_ID || + phydev->drv->phy_id == ATH8035_PHY_ID) { priv->clk_25m_reg &= AT8035_CLK_OUT_MASK; priv->clk_25m_mask &= AT8035_CLK_OUT_MASK; } @@ -631,7 +625,7 @@ static int at803x_parse_dt(struct phy_device *phydev) /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping * options. */ - if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + if (phydev->drv->phy_id == ATH8031_PHY_ID) { if (of_property_read_bool(node, "qca,keep-pll-enabled")) priv->flags |= AT803X_KEEP_PLL_ENABLED; @@ -676,7 +670,7 @@ static int at803x_probe(struct phy_device *phydev) * Switch to the copper page, as otherwise we read * the PHY capabilities from the fiber side. */ - if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + if (phydev->drv->phy_id == ATH8031_PHY_ID) { phy_lock_mdio_bus(phydev); ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); phy_unlock_mdio_bus(phydev); @@ -709,7 +703,7 @@ static int at803x_get_features(struct phy_device *phydev) if (err) return err; - if (!at803x_match_phy_id(phydev, ATH8031_PHY_ID)) + if (phydev->drv->phy_id != ATH8031_PHY_ID) return 0; /* AR8031/AR8033 have different status registers @@ -820,7 +814,7 @@ static int at803x_config_init(struct phy_device *phydev) if (ret < 0) return ret; - if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + if (phydev->drv->phy_id == ATH8031_PHY_ID) { ret = at8031_pll_config(phydev); if (ret < 0) return ret; diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index f7a2ec150e54..211b5476a6f5 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -326,11 +326,9 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) static int dp8382x_disable_wol(struct phy_device *phydev) { - int value = DP83822_WOL_EN | DP83822_WOL_MAGIC_EN | - DP83822_WOL_SECURE_ON; - - return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_WOL_CFG, value); + return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, + DP83822_WOL_EN | DP83822_WOL_MAGIC_EN | + DP83822_WOL_SECURE_ON); } static int dp83822_read_status(struct phy_device *phydev) diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c index d453ec016168..3c032868ef04 100644 --- a/drivers/net/phy/intel-xway.c +++ b/drivers/net/phy/intel-xway.c @@ -8,11 +8,16 @@ #include <linux/module.h> #include <linux/phy.h> #include <linux/of.h> +#include <linux/bitfield.h> +#define XWAY_MDIO_MIICTRL 0x17 /* mii control */ #define XWAY_MDIO_IMASK 0x19 /* interrupt mask */ #define XWAY_MDIO_ISTAT 0x1A /* interrupt status */ #define XWAY_MDIO_LED 0x1B /* led control */ +#define XWAY_MDIO_MIICTRL_RXSKEW_MASK GENMASK(14, 12) +#define XWAY_MDIO_MIICTRL_TXSKEW_MASK GENMASK(10, 8) + /* bit 15:12 are reserved */ #define XWAY_MDIO_LED_LED3_EN BIT(11) /* Enable the integrated function of LED3 */ #define XWAY_MDIO_LED_LED2_EN BIT(10) /* Enable the integrated function of LED2 */ @@ -157,6 +162,73 @@ #define PHY_ID_PHY11G_VR9_1_2 0xD565A409 #define PHY_ID_PHY22F_VR9_1_2 0xD565A419 +static const int xway_internal_delay[] = {0, 500, 1000, 1500, 2000, 2500, + 3000, 3500}; + +static int xway_gphy_rgmii_init(struct phy_device *phydev) +{ + struct device *dev = &phydev->mdio.dev; + unsigned int delay_size = ARRAY_SIZE(xway_internal_delay); + s32 int_delay; + int val = 0; + + if (!phy_interface_is_rgmii(phydev)) + return 0; + + /* Existing behavior was to use default pin strapping delay in rgmii + * mode, but rgmii should have meant no delay. Warn existing users, + * but do not change anything at the moment. + */ + if (phydev->interface == PHY_INTERFACE_MODE_RGMII) { + u16 txskew, rxskew; + + val = phy_read(phydev, XWAY_MDIO_MIICTRL); + if (val < 0) + return val; + + txskew = FIELD_GET(XWAY_MDIO_MIICTRL_TXSKEW_MASK, val); + rxskew = FIELD_GET(XWAY_MDIO_MIICTRL_RXSKEW_MASK, val); + + if (txskew > 0 || rxskew > 0) + phydev_warn(phydev, + "PHY has delays (e.g. via pin strapping), but phy-mode = 'rgmii'\n" + "Should be 'rgmii-id' to use internal delays txskew:%d ps rxskew:%d ps\n", + xway_internal_delay[txskew], + xway_internal_delay[rxskew]); + return 0; + } + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { + int_delay = phy_get_internal_delay(phydev, dev, + xway_internal_delay, + delay_size, true); + + /* if rx-internal-delay-ps is missing, use default of 2.0 ns */ + if (int_delay < 0) + int_delay = 4; /* 2000 ps */ + + val |= FIELD_PREP(XWAY_MDIO_MIICTRL_RXSKEW_MASK, int_delay); + } + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { + int_delay = phy_get_internal_delay(phydev, dev, + xway_internal_delay, + delay_size, false); + + /* if tx-internal-delay-ps is missing, use default of 2.0 ns */ + if (int_delay < 0) + int_delay = 4; /* 2000 ps */ + + val |= FIELD_PREP(XWAY_MDIO_MIICTRL_TXSKEW_MASK, int_delay); + } + + return phy_modify(phydev, XWAY_MDIO_MIICTRL, + XWAY_MDIO_MIICTRL_RXSKEW_MASK | + XWAY_MDIO_MIICTRL_TXSKEW_MASK, val); +} + static int xway_gphy_config_init(struct phy_device *phydev) { int err; @@ -204,6 +276,10 @@ static int xway_gphy_config_init(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2H, ledxh); phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2L, ledxl); + err = xway_gphy_rgmii_init(phydev); + if (err) + return err; + return 0; } diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 53a433442803..0b7cae118ad7 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -28,6 +28,7 @@ #include <linux/marvell_phy.h> #include <linux/phy.h> #include <linux/sfp.h> +#include <linux/netdevice.h> #define MV_PHY_ALASKA_NBT_QUIRK_MASK 0xfffffffe #define MV_PHY_ALASKA_NBT_QUIRK_REV (MARVELL_PHY_ID_88X3310 | 0xa) @@ -104,6 +105,16 @@ enum { MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN = 0x5, MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH = 0x6, MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII = 0x7, + MV_V2_PORT_INTR_STS = 0xf040, + MV_V2_PORT_INTR_MASK = 0xf043, + MV_V2_PORT_INTR_STS_WOL_EN = BIT(8), + MV_V2_MAGIC_PKT_WORD0 = 0xf06b, + MV_V2_MAGIC_PKT_WORD1 = 0xf06c, + MV_V2_MAGIC_PKT_WORD2 = 0xf06d, + /* Wake on LAN registers */ + MV_V2_WOL_CTRL = 0xf06e, + MV_V2_WOL_CTRL_CLEAR_STS = BIT(15), + MV_V2_WOL_CTRL_MAGIC_PKT_EN = BIT(0), /* Temperature control/read registers (88X3310 only) */ MV_V2_TEMP_CTRL = 0xf08a, MV_V2_TEMP_CTRL_MASK = 0xc000, @@ -1020,6 +1031,80 @@ static int mv2111_match_phy_device(struct phy_device *phydev) return mv211x_match_phy_device(phydev, false); } +static void mv3110_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + wol->supported = WAKE_MAGIC; + wol->wolopts = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_WOL_CTRL); + if (ret < 0) + return; + + if (ret & MV_V2_WOL_CTRL_MAGIC_PKT_EN) + wol->wolopts |= WAKE_MAGIC; +} + +static int mv3110_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + if (wol->wolopts & WAKE_MAGIC) { + /* Enable the WOL interrupt */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_PORT_INTR_MASK, + MV_V2_PORT_INTR_STS_WOL_EN); + if (ret < 0) + return ret; + + /* Store the device address for the magic packet */ + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD2, + ((phydev->attached_dev->dev_addr[5] << 8) | + phydev->attached_dev->dev_addr[4])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD1, + ((phydev->attached_dev->dev_addr[3] << 8) | + phydev->attached_dev->dev_addr[2])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD0, + ((phydev->attached_dev->dev_addr[1] << 8) | + phydev->attached_dev->dev_addr[0])); + if (ret < 0) + return ret; + + /* Clear WOL status and enable magic packet matching */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN | + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } else { + /* Disable magic packet matching & reset WOL status bit */ + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN, + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } + + /* Reset the clear WOL status bit as it does not self-clear */ + return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_CLEAR_STS); +} + static struct phy_driver mv3310_drivers[] = { { .phy_id = MARVELL_PHY_ID_88X3310, @@ -1039,6 +1124,8 @@ static struct phy_driver mv3310_drivers[] = { .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88X3310, @@ -1076,6 +1163,8 @@ static struct phy_driver mv3310_drivers[] = { .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88E2110, diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 924ed5b034a4..edb951695b13 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -506,7 +506,7 @@ static int vsc85xx_ptp_cmp_init(struct phy_device *phydev, enum ts_blk blk) { struct vsc8531_private *vsc8531 = phydev->priv; bool base = phydev->mdio.addr == vsc8531->ts_base_addr; - u8 msgs[] = { + static const u8 msgs[] = { PTP_MSGTYPE_SYNC, PTP_MSGTYPE_DELAY_REQ }; @@ -847,7 +847,7 @@ static int vsc85xx_ts_ptp_action_flow(struct phy_device *phydev, enum ts_blk blk static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk, bool one_step, bool enable) { - u8 msgs[] = { + static const u8 msgs[] = { PTP_MSGTYPE_SYNC, PTP_MSGTYPE_DELAY_REQ }; @@ -1268,8 +1268,8 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev) static int __vsc8584_init_ptp(struct phy_device *phydev) { struct vsc8531_private *vsc8531 = phydev->priv; - u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 }; - u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 }; + static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 }; + static const u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 }; u32 val; if (!vsc8584_is_1588_input_clk_configured(phydev)) { diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c new file mode 100644 index 000000000000..2d5d5081c3b6 --- /dev/null +++ b/drivers/net/phy/mxl-gpy.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2021 Maxlinear Corporation + * Copyright (C) 2020 Intel Corporation + * + * Drivers for Maxlinear Ethernet GPY + * + */ + +#include <linux/module.h> +#include <linux/bitfield.h> +#include <linux/phy.h> +#include <linux/netdevice.h> + +/* PHY ID */ +#define PHY_ID_GPYx15B_MASK 0xFFFFFFFC +#define PHY_ID_GPY21xB_MASK 0xFFFFFFF9 +#define PHY_ID_GPY2xx 0x67C9DC00 +#define PHY_ID_GPY115B 0x67C9DF00 +#define PHY_ID_GPY115C 0x67C9DF10 +#define PHY_ID_GPY211B 0x67C9DE08 +#define PHY_ID_GPY211C 0x67C9DE10 +#define PHY_ID_GPY212B 0x67C9DE09 +#define PHY_ID_GPY212C 0x67C9DE20 +#define PHY_ID_GPY215B 0x67C9DF04 +#define PHY_ID_GPY215C 0x67C9DF20 +#define PHY_ID_GPY241B 0x67C9DE40 +#define PHY_ID_GPY241BM 0x67C9DE80 +#define PHY_ID_GPY245B 0x67C9DEC0 + +#define PHY_MIISTAT 0x18 /* MII state */ +#define PHY_IMASK 0x19 /* interrupt mask */ +#define PHY_ISTAT 0x1A /* interrupt status */ +#define PHY_FWV 0x1E /* firmware version */ + +#define PHY_MIISTAT_SPD_MASK GENMASK(2, 0) +#define PHY_MIISTAT_DPX BIT(3) +#define PHY_MIISTAT_LS BIT(10) + +#define PHY_MIISTAT_SPD_10 0 +#define PHY_MIISTAT_SPD_100 1 +#define PHY_MIISTAT_SPD_1000 2 +#define PHY_MIISTAT_SPD_2500 4 + +#define PHY_IMASK_WOL BIT(15) /* Wake-on-LAN */ +#define PHY_IMASK_ANC BIT(10) /* Auto-Neg complete */ +#define PHY_IMASK_ADSC BIT(5) /* Link auto-downspeed detect */ +#define PHY_IMASK_DXMC BIT(2) /* Duplex mode change */ +#define PHY_IMASK_LSPC BIT(1) /* Link speed change */ +#define PHY_IMASK_LSTC BIT(0) /* Link state change */ +#define PHY_IMASK_MASK (PHY_IMASK_LSTC | \ + PHY_IMASK_LSPC | \ + PHY_IMASK_DXMC | \ + PHY_IMASK_ADSC | \ + PHY_IMASK_ANC) + +#define PHY_FWV_REL_MASK BIT(15) +#define PHY_FWV_TYPE_MASK GENMASK(11, 8) +#define PHY_FWV_MINOR_MASK GENMASK(7, 0) + +/* SGMII */ +#define VSPEC1_SGMII_CTRL 0x08 +#define VSPEC1_SGMII_CTRL_ANEN BIT(12) /* Aneg enable */ +#define VSPEC1_SGMII_CTRL_ANRS BIT(9) /* Restart Aneg */ +#define VSPEC1_SGMII_ANEN_ANRS (VSPEC1_SGMII_CTRL_ANEN | \ + VSPEC1_SGMII_CTRL_ANRS) + +/* WoL */ +#define VPSPEC2_WOL_CTL 0x0E06 +#define VPSPEC2_WOL_AD01 0x0E08 +#define VPSPEC2_WOL_AD23 0x0E09 +#define VPSPEC2_WOL_AD45 0x0E0A +#define WOL_EN BIT(0) + +static const struct { + int type; + int minor; +} ver_need_sgmii_reaneg[] = { + {7, 0x6D}, + {8, 0x6D}, + {9, 0x73}, +}; + +static int gpy_config_init(struct phy_device *phydev) +{ + int ret; + + /* Mask all interrupts */ + ret = phy_write(phydev, PHY_IMASK, 0); + if (ret) + return ret; + + /* Clear all pending interrupts */ + ret = phy_read(phydev, PHY_ISTAT); + return ret < 0 ? ret : 0; +} + +static int gpy_probe(struct phy_device *phydev) +{ + int ret; + + if (!phydev->is_c45) { + ret = phy_get_c45_ids(phydev); + if (ret < 0) + return ret; + } + + /* Show GPY PHY FW version in dmesg */ + ret = phy_read(phydev, PHY_FWV); + if (ret < 0) + return ret; + + phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret, + (ret & PHY_FWV_REL_MASK) ? "release" : "test"); + + return 0; +} + +static bool gpy_sgmii_need_reaneg(struct phy_device *phydev) +{ + int fw_ver, fw_type, fw_minor; + size_t i; + + fw_ver = phy_read(phydev, PHY_FWV); + if (fw_ver < 0) + return true; + + fw_type = FIELD_GET(PHY_FWV_TYPE_MASK, fw_ver); + fw_minor = FIELD_GET(PHY_FWV_MINOR_MASK, fw_ver); + + for (i = 0; i < ARRAY_SIZE(ver_need_sgmii_reaneg); i++) { + if (fw_type != ver_need_sgmii_reaneg[i].type) + continue; + if (fw_minor < ver_need_sgmii_reaneg[i].minor) + return true; + break; + } + + return false; +} + +static bool gpy_2500basex_chk(struct phy_device *phydev) +{ + int ret; + + ret = phy_read(phydev, PHY_MIISTAT); + if (ret < 0) { + phydev_err(phydev, "Error: MDIO register access failed: %d\n", + ret); + return false; + } + + if (!(ret & PHY_MIISTAT_LS) || + FIELD_GET(PHY_MIISTAT_SPD_MASK, ret) != PHY_MIISTAT_SPD_2500) + return false; + + phydev->speed = SPEED_2500; + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_CTRL_ANEN, 0); + return true; +} + +static bool gpy_sgmii_aneg_en(struct phy_device *phydev) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL); + if (ret < 0) { + phydev_err(phydev, "Error: MMD register access failed: %d\n", + ret); + return true; + } + + return (ret & VSPEC1_SGMII_CTRL_ANEN) ? true : false; +} + +static int gpy_config_aneg(struct phy_device *phydev) +{ + bool changed = false; + u32 adv; + int ret; + + if (phydev->autoneg == AUTONEG_DISABLE) { + /* Configure half duplex with genphy_setup_forced, + * because genphy_c45_pma_setup_forced does not support. + */ + return phydev->duplex != DUPLEX_FULL + ? genphy_setup_forced(phydev) + : genphy_c45_pma_setup_forced(phydev); + } + + ret = genphy_c45_an_config_aneg(phydev); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); + ret = phy_modify_changed(phydev, MII_CTRL1000, + ADVERTISE_1000FULL | ADVERTISE_1000HALF, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + ret = genphy_c45_check_and_restart_aneg(phydev, changed); + if (ret < 0) + return ret; + + if (phydev->interface == PHY_INTERFACE_MODE_USXGMII || + phydev->interface == PHY_INTERFACE_MODE_INTERNAL) + return 0; + + /* No need to trigger re-ANEG if link speed is 2.5G or SGMII ANEG is + * disabled. + */ + if (!gpy_sgmii_need_reaneg(phydev) || gpy_2500basex_chk(phydev) || + !gpy_sgmii_aneg_en(phydev)) + return 0; + + /* There is a design constraint in GPY2xx device where SGMII AN is + * only triggered when there is change of speed. If, PHY link + * partner`s speed is still same even after PHY TPI is down and up + * again, SGMII AN is not triggered and hence no new in-band message + * from GPY to MAC side SGMII. + * This could cause an issue during power up, when PHY is up prior to + * MAC. At this condition, once MAC side SGMII is up, MAC side SGMII + * wouldn`t receive new in-band message from GPY with correct link + * status, speed and duplex info. + * + * 1) If PHY is already up and TPI link status is still down (such as + * hard reboot), TPI link status is polled for 4 seconds before + * retriggerring SGMII AN. + * 2) If PHY is already up and TPI link status is also up (such as soft + * reboot), polling of TPI link status is not needed and SGMII AN is + * immediately retriggered. + * 3) Other conditions such as PHY is down, speed change etc, skip + * retriggering SGMII AN. Note: in case of speed change, GPY FW will + * initiate SGMII AN. + */ + + if (phydev->state != PHY_UP) + return 0; + + ret = phy_read_poll_timeout(phydev, MII_BMSR, ret, ret & BMSR_LSTATUS, + 20000, 4000000, false); + if (ret == -ETIMEDOUT) + return 0; + else if (ret < 0) + return ret; + + /* Trigger SGMII AN. */ + return phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_CTRL_ANRS, VSPEC1_SGMII_CTRL_ANRS); +} + +static void gpy_update_interface(struct phy_device *phydev) +{ + int ret; + + /* Interface mode is fixed for USXGMII and integrated PHY */ + if (phydev->interface == PHY_INTERFACE_MODE_USXGMII || + phydev->interface == PHY_INTERFACE_MODE_INTERNAL) + return; + + /* Automatically switch SERDES interface between SGMII and 2500-BaseX + * according to speed. Disable ANEG in 2500-BaseX mode. + */ + switch (phydev->speed) { + case SPEED_2500: + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_CTRL_ANEN, 0); + if (ret < 0) + phydev_err(phydev, + "Error: Disable of SGMII ANEG failed: %d\n", + ret); + break; + case SPEED_1000: + case SPEED_100: + case SPEED_10: + phydev->interface = PHY_INTERFACE_MODE_SGMII; + if (gpy_sgmii_aneg_en(phydev)) + break; + /* Enable and restart SGMII ANEG for 10/100/1000Mbps link speed + * if ANEG is disabled (in 2500-BaseX mode). + */ + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_ANEN_ANRS, + VSPEC1_SGMII_ANEN_ANRS); + if (ret < 0) + phydev_err(phydev, + "Error: Enable of SGMII ANEG failed: %d\n", + ret); + break; + } +} + +static int gpy_read_status(struct phy_device *phydev) +{ + int ret; + + ret = genphy_update_link(phydev); + if (ret) + return ret; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { + ret = genphy_c45_read_lpa(phydev); + if (ret < 0) + return ret; + + /* Read the link partner's 1G advertisement */ + ret = phy_read(phydev, MII_STAT1000); + if (ret < 0) + return ret; + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, ret); + } else if (phydev->autoneg == AUTONEG_DISABLE) { + linkmode_zero(phydev->lp_advertising); + } + + ret = phy_read(phydev, PHY_MIISTAT); + if (ret < 0) + return ret; + + phydev->link = (ret & PHY_MIISTAT_LS) ? 1 : 0; + phydev->duplex = (ret & PHY_MIISTAT_DPX) ? DUPLEX_FULL : DUPLEX_HALF; + switch (FIELD_GET(PHY_MIISTAT_SPD_MASK, ret)) { + case PHY_MIISTAT_SPD_10: + phydev->speed = SPEED_10; + break; + case PHY_MIISTAT_SPD_100: + phydev->speed = SPEED_100; + break; + case PHY_MIISTAT_SPD_1000: + phydev->speed = SPEED_1000; + break; + case PHY_MIISTAT_SPD_2500: + phydev->speed = SPEED_2500; + break; + } + + if (phydev->link) + gpy_update_interface(phydev); + + return 0; +} + +static int gpy_config_intr(struct phy_device *phydev) +{ + u16 mask = 0; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + mask = PHY_IMASK_MASK; + + return phy_write(phydev, PHY_IMASK, mask); +} + +static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev) +{ + int reg; + + reg = phy_read(phydev, PHY_ISTAT); + if (reg < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(reg & PHY_IMASK_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + +static int gpy_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + struct net_device *attach_dev = phydev->attached_dev; + int ret; + + if (wol->wolopts & WAKE_MAGIC) { + /* MAC address - Byte0:Byte1:Byte2:Byte3:Byte4:Byte5 + * VPSPEC2_WOL_AD45 = Byte0:Byte1 + * VPSPEC2_WOL_AD23 = Byte2:Byte3 + * VPSPEC2_WOL_AD01 = Byte4:Byte5 + */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_AD45, + ((attach_dev->dev_addr[0] << 8) | + attach_dev->dev_addr[1])); + if (ret < 0) + return ret; + + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_AD23, + ((attach_dev->dev_addr[2] << 8) | + attach_dev->dev_addr[3])); + if (ret < 0) + return ret; + + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_AD01, + ((attach_dev->dev_addr[4] << 8) | + attach_dev->dev_addr[5])); + if (ret < 0) + return ret; + + /* Enable the WOL interrupt */ + ret = phy_write(phydev, PHY_IMASK, PHY_IMASK_WOL); + if (ret < 0) + return ret; + + /* Enable magic packet matching */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_CTL, + WOL_EN); + if (ret < 0) + return ret; + + /* Clear the interrupt status register. + * Only WoL is enabled so clear all. + */ + ret = phy_read(phydev, PHY_ISTAT); + if (ret < 0) + return ret; + } else { + /* Disable magic packet matching */ + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_CTL, + WOL_EN); + if (ret < 0) + return ret; + } + + if (wol->wolopts & WAKE_PHY) { + /* Enable the link state change interrupt */ + ret = phy_set_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC); + if (ret < 0) + return ret; + + /* Clear the interrupt status register */ + ret = phy_read(phydev, PHY_ISTAT); + if (ret < 0) + return ret; + + if (ret & (PHY_IMASK_MASK & ~PHY_IMASK_LSTC)) + phy_trigger_machine(phydev); + + return 0; + } + + /* Disable the link state change interrupt */ + return phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC); +} + +static void gpy_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + wol->supported = WAKE_MAGIC | WAKE_PHY; + wol->wolopts = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, VPSPEC2_WOL_CTL); + if (ret & WOL_EN) + wol->wolopts |= WAKE_MAGIC; + + ret = phy_read(phydev, PHY_IMASK); + if (ret & PHY_IMASK_LSTC) + wol->wolopts |= WAKE_PHY; +} + +static int gpy_loopback(struct phy_device *phydev, bool enable) +{ + int ret; + + ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, + enable ? BMCR_LOOPBACK : 0); + if (!ret) { + /* It takes some time for PHY device to switch + * into/out-of loopback mode. + */ + msleep(100); + } + + return ret; +} + +static struct phy_driver gpy_drivers[] = { + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx), + .name = "Maxlinear Ethernet GPY2xx", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY115B, + .phy_id_mask = PHY_ID_GPYx15B_MASK, + .name = "Maxlinear Ethernet GPY115B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY115C), + .name = "Maxlinear Ethernet GPY115C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY211B, + .phy_id_mask = PHY_ID_GPY21xB_MASK, + .name = "Maxlinear Ethernet GPY211B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY211C), + .name = "Maxlinear Ethernet GPY211C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY212B, + .phy_id_mask = PHY_ID_GPY21xB_MASK, + .name = "Maxlinear Ethernet GPY212B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY212C), + .name = "Maxlinear Ethernet GPY212C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY215B, + .phy_id_mask = PHY_ID_GPYx15B_MASK, + .name = "Maxlinear Ethernet GPY215B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY215C), + .name = "Maxlinear Ethernet GPY215C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY241B), + .name = "Maxlinear Ethernet GPY241B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM), + .name = "Maxlinear Ethernet GPY241BM", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY245B), + .name = "Maxlinear Ethernet GPY245B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, +}; +module_phy_driver(gpy_drivers); + +static struct mdio_device_id __maybe_unused gpy_tbl[] = { + {PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx)}, + {PHY_ID_GPY115B, PHY_ID_GPYx15B_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY115C)}, + {PHY_ID_GPY211B, PHY_ID_GPY21xB_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY211C)}, + {PHY_ID_GPY212B, PHY_ID_GPY21xB_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY212C)}, + {PHY_ID_GPY215B, PHY_ID_GPYx15B_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY215C)}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY241B)}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM)}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY245B)}, + { } +}; +MODULE_DEVICE_TABLE(mdio, gpy_tbl); + +MODULE_DESCRIPTION("Maxlinear Ethernet GPY Driver"); +MODULE_AUTHOR("Xu Liang"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8eeb26d8aeb7..f124a8a58bd4 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -426,7 +426,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) EXPORT_SYMBOL(phy_mii_ioctl); /** - * phy_do_ioctl - generic ndo_do_ioctl implementation + * phy_do_ioctl - generic ndo_eth_ioctl implementation * @dev: the net_device struct * @ifr: &struct ifreq for socket ioctl's * @cmd: ioctl cmd to execute @@ -441,7 +441,7 @@ int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) EXPORT_SYMBOL(phy_do_ioctl); /** - * phy_do_ioctl_running - generic ndo_do_ioctl implementation but test first + * phy_do_ioctl_running - generic ndo_eth_ioctl implementation but test first * * @dev: the net_device struct * @ifr: &struct ifreq for socket ioctl's diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 5d5f9a9ee768..107aa6d7bc6b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -969,6 +969,20 @@ void phy_device_remove(struct phy_device *phydev) EXPORT_SYMBOL(phy_device_remove); /** + * phy_get_c45_ids - Read 802.3-c45 IDs for phy device. + * @phydev: phy_device structure to read 802.3-c45 IDs + * + * Returns zero on success, %-EIO on bus access error, or %-ENODEV if + * the "devices in package" is invalid. + */ +int phy_get_c45_ids(struct phy_device *phydev) +{ + return get_phy_c45_ids(phydev->mdio.bus, phydev->mdio.addr, + &phydev->c45_ids); +} +EXPORT_SYMBOL(phy_get_c45_ids); + +/** * phy_find_first - finds the first PHY device on the bus * @bus: the target MII bus */ diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index eb29ef53d971..2cdf9f989dec 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -942,10 +942,11 @@ static void phylink_phy_change(struct phy_device *phydev, bool up) phylink_run_resolve(pl); - phylink_dbg(pl, "phy link %s %s/%s/%s\n", up ? "up" : "down", + phylink_dbg(pl, "phy link %s %s/%s/%s/%s\n", up ? "up" : "down", phy_modes(phydev->interface), phy_speed_to_str(phydev->speed), - phy_duplex_to_str(phydev->duplex)); + phy_duplex_to_str(phydev->duplex), + phylink_pause_to_str(pl->phy_state.pause)); } static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, @@ -1457,15 +1458,11 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, return phy_ethtool_ksettings_set(pl->phydev, kset); } - linkmode_copy(support, pl->supported); config = pl->link_config; - config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE; - /* Mask out unsupported advertisements, and force the autoneg bit */ + /* Mask out unsupported advertisements */ linkmode_and(config.advertising, kset->link_modes.advertising, - support); - linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising, - config.an_enabled); + pl->supported); /* FIXME: should we reject autoneg if phy/mac does not support it? */ switch (kset->base.autoneg) { @@ -1474,7 +1471,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, * duplex. */ s = phy_lookup_setting(kset->base.speed, kset->base.duplex, - support, false); + pl->supported, false); if (!s) return -EINVAL; @@ -1515,6 +1512,12 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, /* We have ruled out the case with a PHY attached, and the * fixed-link cases. All that is left are in-band links. */ + config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE; + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising, + config.an_enabled); + + /* Validate without changing the current supported mask. */ + linkmode_copy(support, pl->supported); if (phylink_validate(pl, support, &config)) return -EINVAL; diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index e26cf91bdec2..82d609401711 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -84,6 +84,7 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n" extra grounds are 18,19,20,21,22,23,24 */ +#include <linux/compat.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> @@ -150,7 +151,8 @@ static int plip_hard_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); static int plip_open(struct net_device *dev); static int plip_close(struct net_device *dev); -static int plip_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +static int plip_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); static int plip_preempt(void *handle); static void plip_wakeup(void *handle); @@ -265,7 +267,7 @@ static const struct net_device_ops plip_netdev_ops = { .ndo_open = plip_open, .ndo_stop = plip_close, .ndo_start_xmit = plip_tx_packet, - .ndo_do_ioctl = plip_ioctl, + .ndo_siocdevprivate = plip_siocdevprivate, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -1207,7 +1209,8 @@ plip_wakeup(void *handle) } static int -plip_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +plip_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd) { struct net_local *nl = netdev_priv(dev); struct plipconf *pc = (struct plipconf *) &rq->ifr_ifru; @@ -1215,6 +1218,9 @@ plip_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (cmd != SIOCDEVPLIP) return -EOPNOTSUPP; + if (in_compat_syscall()) + return -EOPNOTSUPP; + switch(pc->pcmd) { case PLIP_GET_TIMEOUT: pc->trigger = nl->trigger; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 930e49ef15f6..216a9f4e9750 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1452,11 +1452,11 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev) } static int -ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ppp_net_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *addr, int cmd) { struct ppp *ppp = netdev_priv(dev); int err = -EFAULT; - void __user *addr = (void __user *) ifr->ifr_ifru.ifru_data; struct ppp_stats stats; struct ppp_comp_stats cstats; char *vers; @@ -1585,7 +1585,7 @@ static const struct net_device_ops ppp_netdev_ops = { .ndo_init = ppp_dev_init, .ndo_uninit = ppp_dev_uninit, .ndo_start_xmit = ppp_start_xmit, - .ndo_do_ioctl = ppp_net_ioctl, + .ndo_siocdevprivate = ppp_net_siocdevprivate, .ndo_get_stats64 = ppp_get_stats64, .ndo_fill_forward_path = ppp_fill_forward_path, }; diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index e88af978f63c..f01c9db01b16 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -78,7 +78,8 @@ struct sb1000_private { /* prototypes for Linux interface */ extern int sb1000_probe(struct net_device *dev); static int sb1000_open(struct net_device *dev); -static int sb1000_dev_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd); +static int sb1000_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); static netdev_tx_t sb1000_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t sb1000_interrupt(int irq, void *dev_id); @@ -135,7 +136,7 @@ MODULE_DEVICE_TABLE(pnp, sb1000_pnp_ids); static const struct net_device_ops sb1000_netdev_ops = { .ndo_open = sb1000_open, .ndo_start_xmit = sb1000_start_xmit, - .ndo_do_ioctl = sb1000_dev_ioctl, + .ndo_siocdevprivate = sb1000_siocdevprivate, .ndo_stop = sb1000_close, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -987,7 +988,8 @@ sb1000_open(struct net_device *dev) return 0; /* Always succeed */ } -static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int sb1000_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { char* name; unsigned char version[2]; @@ -1011,7 +1013,7 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) stats[2] = dev->stats.rx_packets; stats[3] = dev->stats.rx_errors; stats[4] = dev->stats.rx_dropped; - if(copy_to_user(ifr->ifr_data, stats, sizeof(stats))) + if (copy_to_user(data, stats, sizeof(stats))) return -EFAULT; status = 0; break; @@ -1019,21 +1021,21 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGCMFIRMWARE: /* get firmware version */ if ((status = sb1000_get_firmware_version(ioaddr, name, version, 1))) return status; - if(copy_to_user(ifr->ifr_data, version, sizeof(version))) + if (copy_to_user(data, version, sizeof(version))) return -EFAULT; break; case SIOCGCMFREQUENCY: /* get frequency */ if ((status = sb1000_get_frequency(ioaddr, name, &frequency))) return status; - if(put_user(frequency, (int __user *) ifr->ifr_data)) + if (put_user(frequency, (int __user *)data)) return -EFAULT; break; case SIOCSCMFREQUENCY: /* set frequency */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - if(get_user(frequency, (int __user *) ifr->ifr_data)) + if (get_user(frequency, (int __user *)data)) return -EFAULT; if ((status = sb1000_set_frequency(ioaddr, name, frequency))) return status; @@ -1042,14 +1044,14 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGCMPIDS: /* get PIDs */ if ((status = sb1000_get_PIDs(ioaddr, name, PID))) return status; - if(copy_to_user(ifr->ifr_data, PID, sizeof(PID))) + if (copy_to_user(data, PID, sizeof(PID))) return -EFAULT; break; case SIOCSCMPIDS: /* set PIDs */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - if(copy_from_user(PID, ifr->ifr_data, sizeof(PID))) + if (copy_from_user(PID, data, sizeof(PID))) return -EFAULT; if ((status = sb1000_set_PIDs(ioaddr, name, PID))) return status; diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index dc84cb844319..5435b5689ce6 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -62,6 +62,7 @@ */ #define SL_CHECK_TRANSMIT +#include <linux/compat.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -108,7 +109,7 @@ static void slip_unesc6(struct slip *sl, unsigned char c); #ifdef CONFIG_SLIP_SMART static void sl_keepalive(struct timer_list *t); static void sl_outfill(struct timer_list *t); -static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd); #endif /******************************** @@ -647,7 +648,7 @@ static const struct net_device_ops sl_netdev_ops = { .ndo_change_mtu = sl_change_mtu, .ndo_tx_timeout = sl_tx_timeout, #ifdef CONFIG_SLIP_SMART - .ndo_do_ioctl = sl_ioctl, + .ndo_siocdevprivate = sl_siocdevprivate, #endif }; @@ -1179,11 +1180,12 @@ static int slip_ioctl(struct tty_struct *tty, struct file *file, /* VSV changes start here */ #ifdef CONFIG_SLIP_SMART -/* function do_ioctl called from net/core/dev.c +/* function sl_siocdevprivate called from net/core/dev.c to allow get/set outfill/keepalive parameter by ifconfig */ -static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd) { struct slip *sl = netdev_priv(dev); unsigned long *p = (unsigned long *)&rq->ifr_ifru; @@ -1191,6 +1193,9 @@ static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (sl == NULL) /* Allocation failed ?? */ return -ENODEV; + if (in_compat_syscall()) + return -EOPNOTSUPP; + spin_lock_bh(&sl->lock); if (!sl->tty) { diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 2c115216420a..cb01897c7a5d 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -197,7 +197,7 @@ static const struct net_device_ops ax88172_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = asix_ioctl, + .ndo_eth_ioctl = asix_ioctl, .ndo_set_rx_mode = ax88172_set_multicast, }; @@ -589,7 +589,7 @@ static const struct net_device_ops ax88772_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = asix_set_multicast, }; @@ -714,7 +714,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) u8 buf[ETH_ALEN] = {0}, chipcode = 0; struct asix_common_private *priv; int ret, i; - u32 phyid; usbnet_get_endpoints(dev, intf); @@ -762,10 +761,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) return ret; } - /* Read PHYID register *AFTER* the PHY was reset properly */ - phyid = asix_get_phyid(dev); - netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid); - /* Asix framing packs multiple eth frames into a 2K usb bulk transfer */ if (dev->driver_info->flags & FLAG_FRAMING_AX) { /* hard_mtu is still the default - the device does not support @@ -1100,7 +1095,7 @@ static const struct net_device_ops ax88178_netdev_ops = { .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = asix_set_multicast, - .ndo_do_ioctl = asix_ioctl, + .ndo_eth_ioctl = asix_ioctl, .ndo_change_mtu = ax88178_change_mtu, }; @@ -1215,6 +1210,7 @@ static const struct driver_info ax88772b_info = { .unbind = ax88772_unbind, .status = asix_status, .reset = ax88772_reset, + .stop = ax88772_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 530947d7477b..d9777d9a7c5d 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -109,7 +109,7 @@ static const struct net_device_ops ax88172a_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = asix_set_multicast, }; diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index c1316718304d..f25448a08870 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1035,7 +1035,7 @@ static const struct net_device_ops ax88179_netdev_ops = { .ndo_change_mtu = ax88179_change_mtu, .ndo_set_mac_address = ax88179_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = ax88179_ioctl, + .ndo_eth_ioctl = ax88179_ioctl, .ndo_set_rx_mode = ax88179_set_multicast, .ndo_set_features = ax88179_set_features, }; diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index 8d1f69dad603..e1da9102a540 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -253,7 +253,8 @@ static int usbpn_close(struct net_device *dev) return usb_set_interface(pnd->usb, num, !pnd->active_setting); } -static int usbpn_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int usbpn_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { struct if_phonet_req *req = (struct if_phonet_req *)ifr; @@ -269,7 +270,7 @@ static const struct net_device_ops usbpn_ops = { .ndo_open = usbpn_open, .ndo_stop = usbpn_close, .ndo_start_xmit = usbpn_xmit, - .ndo_do_ioctl = usbpn_ioctl, + .ndo_siocdevprivate = usbpn_siocdevprivate, }; static void usbpn_setup(struct net_device *dev) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 89cc61d7a675..907f98b1eefe 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -345,7 +345,7 @@ static const struct net_device_ops dm9601_netdev_ops = { .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = dm9601_ioctl, + .ndo_eth_ioctl = dm9601_ioctl, .ndo_set_rx_mode = dm9601_set_multicast, .ndo_set_mac_address = dm9601_set_mac_address, }; diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index dec96e8ab567..827d574f764a 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2353,7 +2353,7 @@ static int remove_net_device(struct hso_device *hso_dev) } /* Frees our network device */ -static void hso_free_net_device(struct hso_device *hso_dev, bool bailout) +static void hso_free_net_device(struct hso_device *hso_dev) { int i; struct hso_net *hso_net = dev2net(hso_dev); @@ -2376,7 +2376,7 @@ static void hso_free_net_device(struct hso_device *hso_dev, bool bailout) kfree(hso_net->mux_bulk_tx_buf); hso_net->mux_bulk_tx_buf = NULL; - if (hso_net->net && !bailout) + if (hso_net->net) free_netdev(hso_net->net); kfree(hso_dev); @@ -3133,7 +3133,7 @@ static void hso_free_interface(struct usb_interface *interface) rfkill_unregister(rfk); rfkill_destroy(rfk); } - hso_free_net_device(network_table[i], false); + hso_free_net_device(network_table[i]); } } } diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 207e59e74935..06e2181e5810 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -443,7 +443,7 @@ static int ipheth_probe(struct usb_interface *intf, netdev->netdev_ops = &ipheth_netdev_ops; netdev->watchdog_timeo = IPHETH_TX_TIMEOUT; - strcpy(netdev->name, "eth%d"); + strscpy(netdev->name, "eth%d", sizeof(netdev->name)); dev = netdev_priv(netdev); dev->udev = udev; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 6d092d78e0cb..4e8d3c28f73e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3609,7 +3609,7 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_change_mtu = lan78xx_change_mtu, .ndo_set_mac_address = lan78xx_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = lan78xx_set_multicast, .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 2469bdcb1a04..66866bef25df 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -464,7 +464,7 @@ static const struct net_device_ops mcs7830_netdev_ops = { .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mcs7830_ioctl, + .ndo_eth_ioctl = mcs7830_ioctl, .ndo_set_rx_mode = mcs7830_set_multicast, .ndo_set_mac_address = mcs7830_set_mac_address, }; diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 652e9fcf0b77..36dafcb3d04a 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1001,7 +1001,8 @@ static const struct ethtool_ops ops = { .set_link_ksettings = pegasus_set_link_ksettings, }; -static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd) +static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq, + void __user *udata, int cmd) { __u16 *data = (__u16 *) &rq->ifr_ifru; pegasus_t *pegasus = netdev_priv(net); @@ -1269,7 +1270,7 @@ static int pegasus_resume(struct usb_interface *intf) static const struct net_device_ops pegasus_netdev_ops = { .ndo_open = pegasus_open, .ndo_stop = pegasus_close, - .ndo_do_ioctl = pegasus_ioctl, + .ndo_siocdevprivate = pegasus_siocdevprivate, .ndo_start_xmit = pegasus_start_xmit, .ndo_set_rx_mode = pegasus_set_multicast, .ndo_tx_timeout = pegasus_tx_timeout, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e09b107b5c99..d7fbc81b518a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9173,7 +9173,7 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops rtl8152_netdev_ops = { .ndo_open = rtl8152_open, .ndo_stop = rtl8152_close, - .ndo_do_ioctl = rtl8152_ioctl, + .ndo_eth_ioctl = rtl8152_ioctl, .ndo_start_xmit = rtl8152_start_xmit, .ndo_tx_timeout = rtl8152_tx_timeout, .ndo_set_features = rtl8152_set_features, diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 7656f2a3afd9..4a1b0e0fc3a3 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -822,7 +822,8 @@ static const struct ethtool_ops ops = { .get_link_ksettings = rtl8150_get_link_ksettings, }; -static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) +static int rtl8150_siocdevprivate(struct net_device *netdev, struct ifreq *rq, + void __user *udata, int cmd) { rtl8150_t *dev = netdev_priv(netdev); u16 *data = (u16 *) & rq->ifr_ifru; @@ -850,7 +851,7 @@ static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static const struct net_device_ops rtl8150_netdev_ops = { .ndo_open = rtl8150_open, .ndo_stop = rtl8150_close, - .ndo_do_ioctl = rtl8150_ioctl, + .ndo_siocdevprivate = rtl8150_siocdevprivate, .ndo_start_xmit = rtl8150_start_xmit, .ndo_tx_timeout = rtl8150_tx_timeout, .ndo_set_rx_mode = rtl8150_set_multicast, diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 13141dbfa3a8..76f7af161313 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1439,7 +1439,7 @@ static const struct net_device_ops smsc75xx_netdev_ops = { .ndo_change_mtu = smsc75xx_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = smsc75xx_ioctl, + .ndo_eth_ioctl = smsc75xx_ioctl, .ndo_set_rx_mode = smsc75xx_set_multicast, .ndo_set_features = smsc75xx_set_features, }; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 4c8ee1cff4d4..7d953974eb9b 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1044,7 +1044,7 @@ static const struct net_device_ops smsc95xx_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = smsc95xx_ioctl, + .ndo_eth_ioctl = smsc95xx_ioctl, .ndo_set_rx_mode = smsc95xx_set_multicast, .ndo_set_features = smsc95xx_set_features, }; diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index ce29261263cd..6516a37893e2 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -310,7 +310,7 @@ static const struct net_device_ops sr9700_netdev_ops = { .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = sr9700_ioctl, + .ndo_eth_ioctl = sr9700_ioctl, .ndo_set_rx_mode = sr9700_set_multicast, .ndo_set_mac_address = sr9700_set_mac_address, }; diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index a822d81310d5..576401c8b1be 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -684,7 +684,7 @@ static const struct net_device_ops sr9800_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = sr_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = sr_ioctl, + .ndo_eth_ioctl = sr_ioctl, .ndo_set_rx_mode = sr_set_multicast, }; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 470e1c1e6353..840c1c2ab16a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1725,7 +1725,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->interrupt_count = 0; dev->net = net; - strcpy (net->name, "usb%d"); + strscpy(net->name, "usb%d", sizeof(net->name)); memcpy (net->dev_addr, node_id, sizeof node_id); /* rx and tx sides can use different message sizes; @@ -1752,13 +1752,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if ((dev->driver_info->flags & FLAG_ETHER) != 0 && ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 || (net->dev_addr [0] & 0x02) == 0)) - strcpy (net->name, "eth%d"); + strscpy(net->name, "eth%d", sizeof(net->name)); /* WLAN devices should always be named "wlan%d" */ if ((dev->driver_info->flags & FLAG_WLAN) != 0) - strcpy(net->name, "wlan%d"); + strscpy(net->name, "wlan%d", sizeof(net->name)); /* WWAN devices should always be named "wwan%d" */ if ((dev->driver_info->flags & FLAG_WWAN) != 0) - strcpy(net->name, "wwan%d"); + strscpy(net->name, "wwan%d", sizeof(net->name)); /* devices that cannot do ARP */ if ((dev->driver_info->flags & FLAG_NOARP) != 0) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index bdb7ce3cb054..50eb43e5bf45 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -224,12 +224,13 @@ static void veth_get_channels(struct net_device *dev, { channels->tx_count = dev->real_num_tx_queues; channels->rx_count = dev->real_num_rx_queues; - channels->max_tx = dev->real_num_tx_queues; - channels->max_rx = dev->real_num_rx_queues; - channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues); - channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues); + channels->max_tx = dev->num_tx_queues; + channels->max_rx = dev->num_rx_queues; } +static int veth_set_channels(struct net_device *dev, + struct ethtool_channels *ch); + static const struct ethtool_ops veth_ethtool_ops = { .get_drvinfo = veth_get_drvinfo, .get_link = ethtool_op_get_link, @@ -239,6 +240,7 @@ static const struct ethtool_ops veth_ethtool_ops = { .get_link_ksettings = veth_get_link_ksettings, .get_ts_info = ethtool_op_get_ts_info, .get_channels = veth_get_channels, + .set_channels = veth_set_channels, }; /* general routines */ @@ -711,7 +713,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, int mac_len, delta, off; struct xdp_buff xdp; - skb_orphan_partial(skb); + skb_prepare_for_gro(skb); rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); @@ -928,12 +930,12 @@ static int veth_poll(struct napi_struct *napi, int budget) return done; } -static int __veth_napi_enable(struct net_device *dev) +static int __veth_napi_enable_range(struct net_device *dev, int start, int end) { struct veth_priv *priv = netdev_priv(dev); int err, i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); @@ -941,7 +943,7 @@ static int __veth_napi_enable(struct net_device *dev) goto err_xdp_ring; } - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; napi_enable(&rq->xdp_napi); @@ -949,19 +951,25 @@ static int __veth_napi_enable(struct net_device *dev) } return 0; + err_xdp_ring: - for (i--; i >= 0; i--) + for (i--; i >= start; i--) ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); return err; } -static void veth_napi_del(struct net_device *dev) +static int __veth_napi_enable(struct net_device *dev) +{ + return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); +} + +static void veth_napi_del_range(struct net_device *dev, int start, int end) { struct veth_priv *priv = netdev_priv(dev); int i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; rcu_assign_pointer(priv->rq[i].napi, NULL); @@ -970,7 +978,7 @@ static void veth_napi_del(struct net_device *dev) } synchronize_net(); - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; rq->rx_notify_masked = false; @@ -978,41 +986,90 @@ static void veth_napi_del(struct net_device *dev) } } +static void veth_napi_del(struct net_device *dev) +{ + veth_napi_del_range(dev, 0, dev->real_num_rx_queues); +} + static bool veth_gro_requested(const struct net_device *dev) { return !!(dev->wanted_features & NETIF_F_GRO); } -static int veth_enable_xdp(struct net_device *dev) +static int veth_enable_xdp_range(struct net_device *dev, int start, int end, + bool napi_already_on) { - bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); struct veth_priv *priv = netdev_priv(dev); int err, i; - if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { - for (i = 0; i < dev->real_num_rx_queues; i++) { - struct veth_rq *rq = &priv->rq[i]; + for (i = start; i < end; i++) { + struct veth_rq *rq = &priv->rq[i]; - if (!napi_already_on) - netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); - err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); - if (err < 0) - goto err_rxq_reg; + if (!napi_already_on) + netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); + err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); + if (err < 0) + goto err_rxq_reg; - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (err < 0) - goto err_reg_mem; + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err < 0) + goto err_reg_mem; - /* Save original mem info as it can be overwritten */ - rq->xdp_mem = rq->xdp_rxq.mem; - } + /* Save original mem info as it can be overwritten */ + rq->xdp_mem = rq->xdp_rxq.mem; + } + return 0; + +err_reg_mem: + xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); +err_rxq_reg: + for (i--; i >= start; i--) { + struct veth_rq *rq = &priv->rq[i]; + + xdp_rxq_info_unreg(&rq->xdp_rxq); + if (!napi_already_on) + netif_napi_del(&rq->xdp_napi); + } + + return err; +} + +static void veth_disable_xdp_range(struct net_device *dev, int start, int end, + bool delete_napi) +{ + struct veth_priv *priv = netdev_priv(dev); + int i; + + for (i = start; i < end; i++) { + struct veth_rq *rq = &priv->rq[i]; + + rq->xdp_rxq.mem = rq->xdp_mem; + xdp_rxq_info_unreg(&rq->xdp_rxq); + + if (delete_napi) + netif_napi_del(&rq->xdp_napi); + } +} + +static int veth_enable_xdp(struct net_device *dev) +{ + bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); + struct veth_priv *priv = netdev_priv(dev); + int err, i; + + if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { + err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); + if (err) + return err; if (!napi_already_on) { err = __veth_napi_enable(dev); - if (err) - goto err_rxq_reg; + if (err) { + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); + return err; + } if (!veth_gro_requested(dev)) { /* user-space did not require GRO, but adding XDP @@ -1030,18 +1087,6 @@ static int veth_enable_xdp(struct net_device *dev) } return 0; -err_reg_mem: - xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); -err_rxq_reg: - for (i--; i >= 0; i--) { - struct veth_rq *rq = &priv->rq[i]; - - xdp_rxq_info_unreg(&rq->xdp_rxq); - if (!napi_already_on) - netif_napi_del(&rq->xdp_napi); - } - - return err; } static void veth_disable_xdp(struct net_device *dev) @@ -1064,28 +1109,23 @@ static void veth_disable_xdp(struct net_device *dev) } } - for (i = 0; i < dev->real_num_rx_queues; i++) { - struct veth_rq *rq = &priv->rq[i]; - - rq->xdp_rxq.mem = rq->xdp_mem; - xdp_rxq_info_unreg(&rq->xdp_rxq); - } + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } -static int veth_napi_enable(struct net_device *dev) +static int veth_napi_enable_range(struct net_device *dev, int start, int end) { struct veth_priv *priv = netdev_priv(dev); int err, i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); } - err = __veth_napi_enable(dev); + err = __veth_napi_enable_range(dev, start, end); if (err) { - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; netif_napi_del(&rq->xdp_napi); @@ -1095,6 +1135,128 @@ static int veth_napi_enable(struct net_device *dev) return err; } +static int veth_napi_enable(struct net_device *dev) +{ + return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); +} + +static void veth_disable_range_safe(struct net_device *dev, int start, int end) +{ + struct veth_priv *priv = netdev_priv(dev); + + if (start >= end) + return; + + if (priv->_xdp_prog) { + veth_napi_del_range(dev, start, end); + veth_disable_xdp_range(dev, start, end, false); + } else if (veth_gro_requested(dev)) { + veth_napi_del_range(dev, start, end); + } +} + +static int veth_enable_range_safe(struct net_device *dev, int start, int end) +{ + struct veth_priv *priv = netdev_priv(dev); + int err; + + if (start >= end) + return 0; + + if (priv->_xdp_prog) { + /* these channels are freshly initialized, napi is not on there even + * when GRO is requeste + */ + err = veth_enable_xdp_range(dev, start, end, false); + if (err) + return err; + + err = __veth_napi_enable_range(dev, start, end); + if (err) { + /* on error always delete the newly added napis */ + veth_disable_xdp_range(dev, start, end, true); + return err; + } + } else if (veth_gro_requested(dev)) { + return veth_napi_enable_range(dev, start, end); + } + return 0; +} + +static int veth_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct veth_priv *priv = netdev_priv(dev); + unsigned int old_rx_count, new_rx_count; + struct veth_priv *peer_priv; + struct net_device *peer; + int err; + + /* sanity check. Upper bounds are already enforced by the caller */ + if (!ch->rx_count || !ch->tx_count) + return -EINVAL; + + /* avoid braking XDP, if that is enabled */ + peer = rtnl_dereference(priv->peer); + peer_priv = peer ? netdev_priv(peer) : NULL; + if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues) + return -EINVAL; + + if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues) + return -EINVAL; + + old_rx_count = dev->real_num_rx_queues; + new_rx_count = ch->rx_count; + if (netif_running(dev)) { + /* turn device off */ + netif_carrier_off(dev); + if (peer) + netif_carrier_off(peer); + + /* try to allocate new resurces, as needed*/ + err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); + if (err) + goto out; + } + + err = netif_set_real_num_rx_queues(dev, ch->rx_count); + if (err) + goto revert; + + err = netif_set_real_num_tx_queues(dev, ch->tx_count); + if (err) { + int err2 = netif_set_real_num_rx_queues(dev, old_rx_count); + + /* this error condition could happen only if rx and tx change + * in opposite directions (e.g. tx nr raises, rx nr decreases) + * and we can't do anything to fully restore the original + * status + */ + if (err2) + pr_warn("Can't restore rx queues config %d -> %d %d", + new_rx_count, old_rx_count, err2); + else + goto revert; + } + +out: + if (netif_running(dev)) { + /* note that we need to swap the arguments WRT the enable part + * to identify the range we have to disable + */ + veth_disable_range_safe(dev, new_rx_count, old_rx_count); + netif_carrier_on(dev); + if (peer) + netif_carrier_on(peer); + } + return err; + +revert: + new_rx_count = old_rx_count; + old_rx_count = ch->rx_count; + goto out; +} + static int veth_open(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -1447,6 +1609,23 @@ static void veth_disable_gro(struct net_device *dev) netdev_update_features(dev); } +static int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) +{ + int err; + + if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) { + err = netif_set_real_num_tx_queues(dev, 1); + if (err) + return err; + } + if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) { + err = netif_set_real_num_rx_queues(dev, 1); + if (err) + return err; + } + return 0; +} + static int veth_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1556,13 +1735,21 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, priv = netdev_priv(dev); rcu_assign_pointer(priv->peer, peer); + err = veth_init_queues(dev, tb); + if (err) + goto err_queues; priv = netdev_priv(peer); rcu_assign_pointer(priv->peer, dev); + err = veth_init_queues(peer, tb); + if (err) + goto err_queues; veth_disable_gro(dev); return 0; +err_queues: + unregister_netdevice(dev); err_register_dev: /* nothing to do */ err_configure_peer: @@ -1608,6 +1795,16 @@ static struct net *veth_get_link_net(const struct net_device *dev) return peer ? dev_net(peer) : dev_net(dev); } +static unsigned int veth_get_num_queues(void) +{ + /* enforce the same queue limit as rtnl_create_link */ + int queues = num_possible_cpus(); + + if (queues > 4096) + queues = 4096; + return queues; +} + static struct rtnl_link_ops veth_link_ops = { .kind = DRV_NAME, .priv_size = sizeof(struct veth_priv), @@ -1618,6 +1815,8 @@ static struct rtnl_link_ops veth_link_ops = { .policy = veth_policy, .maxtype = VETH_INFO_MAX, .get_link_net = veth_get_link_net, + .get_num_tx_queues = veth_get_num_queues, + .get_num_rx_queues = veth_get_num_queues, }; /* diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 56c3f8519093..2e42210a6503 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, bool hdr_valid, unsigned int metasize, - bool whole_page) + unsigned int headroom) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -398,28 +398,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - /* If whole_page, there is an offset between the beginning of the + /* If headroom is not 0, there is an offset between the beginning of the * data and the allocated space, otherwise the data and the allocated * space are aligned. * * Buffers with headroom use PAGE_SIZE as alloc size, see * add_recvbuf_mergeable() + get_mergeable_buf_len() */ - if (whole_page) { - /* Buffers with whole_page use PAGE_SIZE as alloc size, - * see add_recvbuf_mergeable() + get_mergeable_buf_len() - */ - truesize = PAGE_SIZE; - - /* page maybe head page, so we should get the buf by p, not the - * page - */ - tailroom = truesize - len - offset_in_page(p); - buf = (char *)((unsigned long)p & PAGE_MASK); - } else { - tailroom = truesize - len; - buf = p; - } + truesize = headroom ? PAGE_SIZE : truesize; + tailroom = truesize - len - headroom; + buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; @@ -978,7 +966,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE, false, - metasize, true); + metasize, + VIRTIO_XDP_HEADROOM); return head_skb; } break; @@ -1029,7 +1018,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, - metasize, !!headroom); + metasize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -2208,14 +2197,14 @@ static int virtnet_set_channels(struct net_device *dev, if (vi->rq[0].xdp_prog) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); err = _virtnet_set_queues(vi, queue_pairs); if (err) { - put_online_cpus(); + cpus_read_unlock(); goto err; } virtnet_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs); @@ -2970,9 +2959,9 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; - get_online_cpus(); + cpus_read_lock(); virtnet_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); return 0; diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index c5a167a1c85c..7a38925f4165 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -2,7 +2,7 @@ # # Linux driver for VMware's vmxnet3 ethernet NIC. # -# Copyright (C) 2007-2020, VMware, Inc. All Rights Reserved. +# Copyright (C) 2007-2021, VMware, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h index 8c014c98471c..f9f3a23d1698 100644 --- a/drivers/net/vmxnet3/upt1_defs.h +++ b/drivers/net/vmxnet3/upt1_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index a8d5ebd47c71..74d4e8bc4abc 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -98,6 +98,9 @@ enum { VMXNET3_CMD_GET_TXDATA_DESC_SIZE, VMXNET3_CMD_GET_COALESCE, VMXNET3_CMD_GET_RSS_FIELDS, + VMXNET3_CMD_GET_RESERVED2, + VMXNET3_CMD_GET_RESERVED3, + VMXNET3_CMD_GET_MAX_QUEUES_CONF, }; /* @@ -341,13 +344,15 @@ struct Vmxnet3_RxCompDescExt { #define VMXNET3_TXD_EOP_SIZE 1 /* value of RxCompDesc.rssType */ -enum { - VMXNET3_RCD_RSS_TYPE_NONE = 0, - VMXNET3_RCD_RSS_TYPE_IPV4 = 1, - VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2, - VMXNET3_RCD_RSS_TYPE_IPV6 = 3, - VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4, -}; +#define VMXNET3_RCD_RSS_TYPE_NONE 0 +#define VMXNET3_RCD_RSS_TYPE_IPV4 1 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV4 2 +#define VMXNET3_RCD_RSS_TYPE_IPV6 3 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV6 4 +#define VMXNET3_RCD_RSS_TYPE_UDPIPV4 5 +#define VMXNET3_RCD_RSS_TYPE_UDPIPV6 6 +#define VMXNET3_RCD_RSS_TYPE_ESPIPV4 7 +#define VMXNET3_RCD_RSS_TYPE_ESPIPV6 8 /* a union for accessing all cmd/completion descriptors */ @@ -533,6 +538,13 @@ enum vmxnet3_intr_type { /* addition 1 for events */ #define VMXNET3_MAX_INTRS 25 +/* Version 6 and later will use below macros */ +#define VMXNET3_EXT_MAX_TX_QUEUES 32 +#define VMXNET3_EXT_MAX_RX_QUEUES 32 +/* addition 1 for events */ +#define VMXNET3_EXT_MAX_INTRS 65 +#define VMXNET3_FIRST_SET_INTRS 64 + /* value of intrCtrl */ #define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */ @@ -547,6 +559,19 @@ struct Vmxnet3_IntrConf { __le32 reserved[2]; }; +struct Vmxnet3_IntrConfExt { + u8 autoMask; + u8 numIntrs; /* # of interrupts */ + u8 eventIntrIdx; + u8 reserved; + __le32 intrCtrl; + __le32 reserved1; + u8 modLevels[VMXNET3_EXT_MAX_INTRS]; /* moderation level for + * each intr + */ + u8 reserved2[3]; +}; + /* one bit per VLAN ID, the size is in the units of u32 */ #define VMXNET3_VFT_SIZE (4096 / (sizeof(u32) * 8)) @@ -719,11 +744,16 @@ struct Vmxnet3_DSDevRead { struct Vmxnet3_VariableLenConfDesc pluginConfDesc; }; +struct Vmxnet3_DSDevReadExt { + /* read-only region for device, read by dev in response to a SET cmd */ + struct Vmxnet3_IntrConfExt intrConfExt; +}; + /* All structures in DriverShared are padded to multiples of 8 bytes */ struct Vmxnet3_DriverShared { __le32 magic; /* make devRead start at 64bit boundaries */ - __le32 pad; + __le32 size; /* size of DriverShared */ struct Vmxnet3_DSDevRead devRead; __le32 ecr; __le32 reserved; @@ -734,6 +764,7 @@ struct Vmxnet3_DriverShared { * command */ } cu; + struct Vmxnet3_DSDevReadExt devReadExt; }; @@ -764,6 +795,7 @@ struct Vmxnet3_DriverShared { ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0) #define VMXNET3_MAX_MTU 9000 +#define VMXNET3_V6_MAX_MTU 9190 #define VMXNET3_MIN_MTU 60 #define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */ diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 6e87f1fc4874..e3c6b7e3bfdd 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -1478,10 +1478,28 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, #ifdef VMXNET3_RSS if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && - (adapter->netdev->features & NETIF_F_RXHASH)) + (adapter->netdev->features & NETIF_F_RXHASH)) { + enum pkt_hash_types hash_type; + + switch (rcd->rssType) { + case VMXNET3_RCD_RSS_TYPE_IPV4: + case VMXNET3_RCD_RSS_TYPE_IPV6: + hash_type = PKT_HASH_TYPE_L3; + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: + case VMXNET3_RCD_RSS_TYPE_UDPIPV4: + case VMXNET3_RCD_RSS_TYPE_UDPIPV6: + hash_type = PKT_HASH_TYPE_L4; + break; + default: + hash_type = PKT_HASH_TYPE_L3; + break; + } skb_set_hash(ctx->skb, le32_to_cpu(rcd->rssHash), - PKT_HASH_TYPE_L3); + hash_type); + } #endif skb_put(ctx->skb, rcd->len); @@ -2460,6 +2478,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) { struct Vmxnet3_DriverShared *shared = adapter->shared; struct Vmxnet3_DSDevRead *devRead = &shared->devRead; + struct Vmxnet3_DSDevReadExt *devReadExt = &shared->devReadExt; struct Vmxnet3_TxQueueConf *tqc; struct Vmxnet3_RxQueueConf *rqc; int i; @@ -2572,14 +2591,26 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) #endif /* VMXNET3_RSS */ /* intr settings */ - devRead->intrConf.autoMask = adapter->intr.mask_mode == - VMXNET3_IMM_AUTO; - devRead->intrConf.numIntrs = adapter->intr.num_intrs; - for (i = 0; i < adapter->intr.num_intrs; i++) - devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i]; + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + devRead->intrConf.autoMask = adapter->intr.mask_mode == + VMXNET3_IMM_AUTO; + devRead->intrConf.numIntrs = adapter->intr.num_intrs; + for (i = 0; i < adapter->intr.num_intrs; i++) + devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i]; + + devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx; + devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } else { + devReadExt->intrConfExt.autoMask = adapter->intr.mask_mode == + VMXNET3_IMM_AUTO; + devReadExt->intrConfExt.numIntrs = adapter->intr.num_intrs; + for (i = 0; i < adapter->intr.num_intrs; i++) + devReadExt->intrConfExt.modLevels[i] = adapter->intr.mod_levels[i]; - devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx; - devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + devReadExt->intrConfExt.eventIntrIdx = adapter->intr.event_intr_idx; + devReadExt->intrConfExt.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } /* rx filter settings */ devRead->rxFilterConf.rxMode = 0; @@ -2717,6 +2748,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) * tx queue if the link is up. */ vmxnet3_check_link(adapter, true); + netif_tx_wake_all_queues(adapter->netdev); for (i = 0; i < adapter->num_rx_queues; i++) napi_enable(&adapter->rx_queue[i].napi); vmxnet3_enable_all_intrs(adapter); @@ -3372,6 +3404,8 @@ vmxnet3_probe_device(struct pci_dev *pdev, int size; int num_tx_queues; int num_rx_queues; + int queues; + unsigned long flags; if (!pci_msi_enabled()) enable_mq = 0; @@ -3383,7 +3417,6 @@ vmxnet3_probe_device(struct pci_dev *pdev, else #endif num_rx_queues = 1; - num_rx_queues = rounddown_pow_of_two(num_rx_queues); if (enable_mq) num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES, @@ -3391,13 +3424,8 @@ vmxnet3_probe_device(struct pci_dev *pdev, else num_tx_queues = 1; - num_tx_queues = rounddown_pow_of_two(num_tx_queues); netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter), max(num_tx_queues, num_rx_queues)); - dev_info(&pdev->dev, - "# of Tx queues : %d, # of Rx queues : %d\n", - num_tx_queues, num_rx_queues); - if (!netdev) return -ENOMEM; @@ -3447,51 +3475,22 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_alloc_shared; } - adapter->num_rx_queues = num_rx_queues; - adapter->num_tx_queues = num_tx_queues; - adapter->rx_buf_per_pkt = 1; - - size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues; - size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues; - adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size, - &adapter->queue_desc_pa, - GFP_KERNEL); - - if (!adapter->tqd_start) { - dev_err(&pdev->dev, "Failed to allocate memory\n"); - err = -ENOMEM; - goto err_alloc_queue_desc; - } - adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start + - adapter->num_tx_queues); - - adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev, - sizeof(struct Vmxnet3_PMConf), - &adapter->pm_conf_pa, - GFP_KERNEL); - if (adapter->pm_conf == NULL) { - err = -ENOMEM; - goto err_alloc_pm; - } - -#ifdef VMXNET3_RSS - - adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev, - sizeof(struct UPT1_RSSConf), - &adapter->rss_conf_pa, - GFP_KERNEL); - if (adapter->rss_conf == NULL) { - err = -ENOMEM; - goto err_alloc_rss; - } -#endif /* VMXNET3_RSS */ - err = vmxnet3_alloc_pci_resources(adapter); if (err < 0) goto err_alloc_pci; ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); - if (ver & (1 << VMXNET3_REV_4)) { + if (ver & (1 << VMXNET3_REV_6)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_6); + adapter->version = VMXNET3_REV_6 + 1; + } else if (ver & (1 << VMXNET3_REV_5)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_5); + adapter->version = VMXNET3_REV_5 + 1; + } else if (ver & (1 << VMXNET3_REV_4)) { VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1 << VMXNET3_REV_4); @@ -3529,6 +3528,77 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_ver; } + if (VMXNET3_VERSION_GE_6(adapter)) { + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_MAX_QUEUES_CONF); + queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (queues > 0) { + adapter->num_rx_queues = min(num_rx_queues, ((queues >> 8) & 0xff)); + adapter->num_tx_queues = min(num_tx_queues, (queues & 0xff)); + } else { + adapter->num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + adapter->num_tx_queues = min(num_tx_queues, + VMXNET3_DEVICE_DEFAULT_TX_QUEUES); + } + if (adapter->num_rx_queues > VMXNET3_MAX_RX_QUEUES || + adapter->num_tx_queues > VMXNET3_MAX_TX_QUEUES) { + adapter->queuesExtEnabled = true; + } else { + adapter->queuesExtEnabled = false; + } + } else { + adapter->queuesExtEnabled = false; + num_rx_queues = rounddown_pow_of_two(num_rx_queues); + num_tx_queues = rounddown_pow_of_two(num_tx_queues); + adapter->num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + adapter->num_tx_queues = min(num_tx_queues, + VMXNET3_DEVICE_DEFAULT_TX_QUEUES); + } + dev_info(&pdev->dev, + "# of Tx queues : %d, # of Rx queues : %d\n", + adapter->num_tx_queues, adapter->num_rx_queues); + + adapter->rx_buf_per_pkt = 1; + + size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues; + size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues; + adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size, + &adapter->queue_desc_pa, + GFP_KERNEL); + + if (!adapter->tqd_start) { + dev_err(&pdev->dev, "Failed to allocate memory\n"); + err = -ENOMEM; + goto err_ver; + } + adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start + + adapter->num_tx_queues); + + adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_PMConf), + &adapter->pm_conf_pa, + GFP_KERNEL); + if (adapter->pm_conf == NULL) { + err = -ENOMEM; + goto err_alloc_pm; + } + +#ifdef VMXNET3_RSS + + adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev, + sizeof(struct UPT1_RSSConf), + &adapter->rss_conf_pa, + GFP_KERNEL); + if (adapter->rss_conf == NULL) { + err = -ENOMEM; + goto err_alloc_rss; + } +#endif /* VMXNET3_RSS */ + if (VMXNET3_VERSION_GE_3(adapter)) { adapter->coal_conf = dma_alloc_coherent(&adapter->pdev->dev, @@ -3538,7 +3608,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, GFP_KERNEL); if (!adapter->coal_conf) { err = -ENOMEM; - goto err_ver; + goto err_coal_conf; } adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED; adapter->default_coal_mode = true; @@ -3581,9 +3651,12 @@ vmxnet3_probe_device(struct pci_dev *pdev, vmxnet3_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - /* MTU range: 60 - 9000 */ + /* MTU range: 60 - 9190 */ netdev->min_mtu = VMXNET3_MIN_MTU; - netdev->max_mtu = VMXNET3_MAX_MTU; + if (VMXNET3_VERSION_GE_6(adapter)) + netdev->max_mtu = VMXNET3_V6_MAX_MTU; + else + netdev->max_mtu = VMXNET3_MAX_MTU; INIT_WORK(&adapter->work, vmxnet3_reset_work); set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); @@ -3621,9 +3694,7 @@ err_register: adapter->coal_conf, adapter->coal_conf_pa); } vmxnet3_free_intr_resources(adapter); -err_ver: - vmxnet3_free_pci_resources(adapter); -err_alloc_pci: +err_coal_conf: #ifdef VMXNET3_RSS dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf), adapter->rss_conf, adapter->rss_conf_pa); @@ -3634,7 +3705,9 @@ err_alloc_rss: err_alloc_pm: dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start, adapter->queue_desc_pa); -err_alloc_queue_desc: +err_ver: + vmxnet3_free_pci_resources(adapter); +err_alloc_pci: dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_DriverShared), adapter->shared, adapter->shared_pa); @@ -3653,7 +3726,8 @@ vmxnet3_remove_device(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct vmxnet3_adapter *adapter = netdev_priv(netdev); int size = 0; - int num_rx_queues; + int num_rx_queues, rx_queues; + unsigned long flags; #ifdef VMXNET3_RSS if (enable_mq) @@ -3662,7 +3736,24 @@ vmxnet3_remove_device(struct pci_dev *pdev) else #endif num_rx_queues = 1; - num_rx_queues = rounddown_pow_of_two(num_rx_queues); + if (!VMXNET3_VERSION_GE_6(adapter)) { + num_rx_queues = rounddown_pow_of_two(num_rx_queues); + } + if (VMXNET3_VERSION_GE_6(adapter)) { + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_MAX_QUEUES_CONF); + rx_queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (rx_queues > 0) + rx_queues = (rx_queues >> 8) & 0xff; + else + rx_queues = min(num_rx_queues, VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + num_rx_queues = min(num_rx_queues, rx_queues); + } else { + num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + } cancel_work_sync(&adapter->work); diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 1b483cf2b1ca..a3e2f2ba68b5 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -787,6 +787,10 @@ vmxnet3_get_rss_hash_opts(struct vmxnet3_adapter *adapter, case AH_ESP_V6_FLOW: case AH_V6_FLOW: case ESP_V6_FLOW: + if (VMXNET3_VERSION_GE_6(adapter) && + (rss_fields & VMXNET3_RSS_FIELDS_ESPIP6)) + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + fallthrough; case SCTP_V6_FLOW: case IPV6_FLOW: info->data |= RXH_IP_SRC | RXH_IP_DST; @@ -871,6 +875,22 @@ vmxnet3_set_rss_hash_opt(struct net_device *netdev, case ESP_V6_FLOW: case AH_V6_FLOW: case AH_ESP_V6_FLOW: + if (!VMXNET3_VERSION_GE_6(adapter)) + return -EOPNOTSUPP; + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_fields &= ~VMXNET3_RSS_FIELDS_ESPIP6; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_fields |= VMXNET3_RSS_FIELDS_ESPIP6; + break; + default: + return -EINVAL; + } + break; case SCTP_V4_FLOW: case SCTP_V6_FLOW: if (!(nfc->data & RXH_IP_SRC) || diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index e910596b79cf..7027ff483fa5 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -69,18 +69,20 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.5.0.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.6.0.0-k" /* Each byte of this 32-bit integer encodes a version number in * VMXNET3_DRIVER_VERSION_STRING. */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01050000 +#define VMXNET3_DRIVER_VERSION_NUM 0x01060000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ #define VMXNET3_RSS #endif +#define VMXNET3_REV_6 5 /* Vmxnet3 Rev. 6 */ +#define VMXNET3_REV_5 4 /* Vmxnet3 Rev. 5 */ #define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */ #define VMXNET3_REV_3 2 /* Vmxnet3 Rev. 3 */ #define VMXNET3_REV_2 1 /* Vmxnet3 Rev. 2 */ @@ -301,15 +303,18 @@ struct vmxnet3_rx_queue { struct vmxnet3_rq_driver_stats stats; } __attribute__((__aligned__(SMP_CACHE_BYTES))); -#define VMXNET3_DEVICE_MAX_TX_QUEUES 8 -#define VMXNET3_DEVICE_MAX_RX_QUEUES 8 /* Keep this value as a power of 2 */ +#define VMXNET3_DEVICE_MAX_TX_QUEUES 32 +#define VMXNET3_DEVICE_MAX_RX_QUEUES 32 /* Keep this value as a power of 2 */ + +#define VMXNET3_DEVICE_DEFAULT_TX_QUEUES 8 +#define VMXNET3_DEVICE_DEFAULT_RX_QUEUES 8 /* Keep this value as a power of 2 */ /* Should be less than UPT1_RSS_MAX_IND_TABLE_SIZE */ #define VMXNET3_RSS_IND_TABLE_SIZE (VMXNET3_DEVICE_MAX_RX_QUEUES * 4) #define VMXNET3_LINUX_MAX_MSIX_VECT (VMXNET3_DEVICE_MAX_TX_QUEUES + \ VMXNET3_DEVICE_MAX_RX_QUEUES + 1) -#define VMXNET3_LINUX_MIN_MSIX_VECT 2 /* 1 for tx-rx pair and 1 for event */ +#define VMXNET3_LINUX_MIN_MSIX_VECT 3 /* 1 for tx, 1 for rx pair and 1 for event */ struct vmxnet3_intr { @@ -396,6 +401,7 @@ struct vmxnet3_adapter { dma_addr_t adapter_pa; dma_addr_t pm_conf_pa; dma_addr_t rss_conf_pa; + bool queuesExtEnabled; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ @@ -421,6 +427,10 @@ struct vmxnet3_adapter { (adapter->version >= VMXNET3_REV_3 + 1) #define VMXNET3_VERSION_GE_4(adapter) \ (adapter->version >= VMXNET3_REV_4 + 1) +#define VMXNET3_VERSION_GE_5(adapter) \ + (adapter->version >= VMXNET3_REV_5 + 1) +#define VMXNET3_VERSION_GE_6(adapter) \ + (adapter->version >= VMXNET3_REV_6 + 1) /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 2b1b944d4b28..726adf07ef31 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; - int ret = -EINVAL; nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (!skb2) { - ret = -ENOMEM; - goto err; + skb = skb_expand_head(skb, hh_len); + if (!skb) { + skb->dev->stats.tx_errors++; + return -ENOMEM; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { + int ret; + sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); @@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s } rcu_read_unlock_bh(); -err: vrf_tx_error(skb->dev, skb); - return ret; + return -EINVAL; } static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 473df2505c8e..592a8389fc5a 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -290,30 +290,6 @@ config SLIC_DS26522 To compile this driver as a module, choose M here: the module will be called slic_ds26522. -config DSCC4_PCISYNC - bool "Etinc PCISYNC features" - depends on DSCC4 - help - Due to Etinc's design choice for its PCISYNC cards, some operations - are only allowed on specific ports of the DSCC4. This option is the - only way for the driver to know that it shouldn't return a success - code for these operations. - - Please say Y if your card is an Etinc's PCISYNC. - -config DSCC4_PCI_RST - bool "Hard reset support" - depends on DSCC4 - help - Various DSCC4 bugs forbid any reliable software reset of the ASIC. - As a replacement, some vendors provide a way to assert the PCI #RST - pin of DSCC4 through the GPIO port of the card. If you choose Y, - the driver will make use of this feature before module removal - (i.e. rmmod). The feature is known to be available on Commtech's - cards. Contact your manufacturer for details. - - Say Y if your card supports this feature. - config IXP4XX_HSS tristate "Intel IXP4xx HSS (synchronous serial port) support" depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR @@ -337,33 +313,6 @@ config LAPBETHER To compile this driver as a module, choose M here: the module will be called lapbether. - If unsure, say N. - -config SBNI - tristate "Granch SBNI12 Leased Line adapter support" - depends on X86 - help - Driver for ISA SBNI12-xx cards which are low cost alternatives to - leased line modems. - - You can find more information and last versions of drivers and - utilities at <http://www.granch.ru/>. If you have any question you - can send email to <sbni@granch.ru>. - - To compile this driver as a module, choose M here: the - module will be called sbni. - - If unsure, say N. - -config SBNI_MULTILINE - bool "Multiple line feature support" - depends on SBNI - help - Schedule traffic for some parallel lines, via SBNI12 adapters. - - If you have two computers connected with two parallel lines it's - possible to increase transfer rate nearly twice. You should have - a program named 'sbniconfig' to configure adapters. If unsure, say N. diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 081666c36ca2..f6b92efffc94 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC) += farsync.o obj-$(CONFIG_LANMEDIA) += lmc/ obj-$(CONFIG_LAPBETHER) += lapbether.o -obj-$(CONFIG_SBNI) += sbni.o obj-$(CONFIG_N2) += n2.o obj-$(CONFIG_C101) += c101.o obj-$(CONFIG_WANXL) += wanxl.o diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c index 059c2f7133be..8dd14d916c3a 100644 --- a/drivers/net/wan/c101.c +++ b/drivers/net/wan/c101.c @@ -208,14 +208,12 @@ static int c101_close(struct net_device *dev) return 0; } -static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int c101_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { - const size_t size = sizeof(sync_serial_settings); - sync_serial_settings new_line; - sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; +#ifdef DEBUG_RINGS port_t *port = dev_to_port(dev); -#ifdef DEBUG_RINGS if (cmd == SIOCDEVPRIVATE) { sca_dump_rings(dev); printk(KERN_DEBUG "MSCI1: ST: %02x %02x %02x %02x\n", @@ -226,14 +224,22 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; } #endif - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); - switch (ifr->ifr_settings.type) { + return -EOPNOTSUPP; +} + +static int c101_ioctl(struct net_device *dev, struct if_settings *ifs) +{ + const size_t size = sizeof(sync_serial_settings); + sync_serial_settings new_line; + sync_serial_settings __user *line = ifs->ifs_ifsu.sync; + port_t *port = dev_to_port(dev); + + switch (ifs->type) { case IF_GET_IFACE: - ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_SYNC_SERIAL; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(line, &port->settings, size)) @@ -261,7 +267,7 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -286,7 +292,8 @@ static const struct net_device_ops c101_ops = { .ndo_open = c101_open, .ndo_stop = c101_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = c101_ioctl, + .ndo_siocwandev = c101_ioctl, + .ndo_siocdevprivate = c101_siocdevprivate, }; static int __init c101_run(unsigned long irq, unsigned long winbase) diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 43caab0b7dee..23d2954d9747 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -267,7 +267,6 @@ static netdev_tx_t cosa_net_tx(struct sk_buff *skb, struct net_device *d); static char *cosa_net_setup_rx(struct channel_data *channel, int size); static int cosa_net_rx_done(struct channel_data *channel); static int cosa_net_tx_done(struct channel_data *channel, int size); -static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); /* Character device */ static char *chrdev_setup_rx(struct channel_data *channel, int size); @@ -415,7 +414,7 @@ static const struct net_device_ops cosa_ops = { .ndo_open = cosa_net_open, .ndo_stop = cosa_net_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = cosa_net_ioctl, + .ndo_siocwandev = hdlc_ioctl, .ndo_tx_timeout = cosa_net_timeout, }; @@ -1169,18 +1168,6 @@ static int cosa_ioctl_common(struct cosa_data *cosa, return -ENOIOCTLCMD; } -static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - int rv; - struct channel_data *chan = dev_to_chan(dev); - - rv = cosa_ioctl_common(chan->cosa, chan, cmd, - (unsigned long)ifr->ifr_data); - if (rv != -ENOIOCTLCMD) - return rv; - return hdlc_ioctl(dev, ifr, cmd); -} - static long cosa_chardev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index b3466e084e84..6a212c085435 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -1784,16 +1784,15 @@ gather_conf_info(struct fst_card_info *card, struct fst_port_info *port, static int fst_set_iface(struct fst_card_info *card, struct fst_port_info *port, - struct ifreq *ifr) + struct if_settings *ifs) { sync_serial_settings sync; int i; - if (ifr->ifr_settings.size != sizeof(sync)) + if (ifs->size != sizeof(sync)) return -ENOMEM; - if (copy_from_user - (&sync, ifr->ifr_settings.ifs_ifsu.sync, sizeof(sync))) + if (copy_from_user(&sync, ifs->ifs_ifsu.sync, sizeof(sync))) return -EFAULT; if (sync.loopback) @@ -1801,7 +1800,7 @@ fst_set_iface(struct fst_card_info *card, struct fst_port_info *port, i = port->index; - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_IFACE_V35: FST_WRW(card, portConfig[i].lineInterface, V35); port->hwif = V35; @@ -1857,7 +1856,7 @@ fst_set_iface(struct fst_card_info *card, struct fst_port_info *port, static int fst_get_iface(struct fst_card_info *card, struct fst_port_info *port, - struct ifreq *ifr) + struct if_settings *ifs) { sync_serial_settings sync; int i; @@ -1868,29 +1867,29 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port, */ switch (port->hwif) { case E1: - ifr->ifr_settings.type = IF_IFACE_E1; + ifs->type = IF_IFACE_E1; break; case T1: - ifr->ifr_settings.type = IF_IFACE_T1; + ifs->type = IF_IFACE_T1; break; case V35: - ifr->ifr_settings.type = IF_IFACE_V35; + ifs->type = IF_IFACE_V35; break; case V24: - ifr->ifr_settings.type = IF_IFACE_V24; + ifs->type = IF_IFACE_V24; break; case X21D: - ifr->ifr_settings.type = IF_IFACE_X21D; + ifs->type = IF_IFACE_X21D; break; case X21: default: - ifr->ifr_settings.type = IF_IFACE_X21; + ifs->type = IF_IFACE_X21; break; } - if (ifr->ifr_settings.size == 0) + if (!ifs->size) return 0; /* only type requested */ - if (ifr->ifr_settings.size < sizeof(sync)) + if (ifs->size < sizeof(sync)) return -ENOMEM; i = port->index; @@ -1901,15 +1900,15 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port, INTCLK ? CLOCK_INT : CLOCK_EXT; sync.loopback = 0; - if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &sync, sizeof(sync))) + if (copy_to_user(ifs->ifs_ifsu.sync, &sync, sizeof(sync))) return -EFAULT; - ifr->ifr_settings.size = sizeof(sync); + ifs->size = sizeof(sync); return 0; } static int -fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +fst_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { struct fst_card_info *card; struct fst_port_info *port; @@ -1918,7 +1917,7 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) unsigned long flags; void *buf; - dbg(DBG_IOCTL, "ioctl: %x, %p\n", cmd, ifr->ifr_data); + dbg(DBG_IOCTL, "ioctl: %x, %p\n", cmd, data); port = dev_to_port(dev); card = port->card; @@ -1942,11 +1941,10 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /* First copy in the header with the length and offset of data * to write */ - if (!ifr->ifr_data) + if (!data) return -EINVAL; - if (copy_from_user(&wrthdr, ifr->ifr_data, - sizeof(struct fstioc_write))) + if (copy_from_user(&wrthdr, data, sizeof(struct fstioc_write))) return -EFAULT; /* Sanity check the parameters. We don't support partial writes @@ -1958,7 +1956,7 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /* Now copy the data to the card. */ - buf = memdup_user(ifr->ifr_data + sizeof(struct fstioc_write), + buf = memdup_user(data + sizeof(struct fstioc_write), wrthdr.size); if (IS_ERR(buf)) return PTR_ERR(buf); @@ -1991,12 +1989,12 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } - if (!ifr->ifr_data) + if (!data) return -EINVAL; gather_conf_info(card, port, &info); - if (copy_to_user(ifr->ifr_data, &info, sizeof(info))) + if (copy_to_user(data, &info, sizeof(info))) return -EFAULT; return 0; @@ -2011,46 +2009,58 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) card->card_no, card->state); return -EIO; } - if (copy_from_user(&info, ifr->ifr_data, sizeof(info))) + if (copy_from_user(&info, data, sizeof(info))) return -EFAULT; return set_conf_from_info(card, port, &info); + default: + return -EINVAL; + } +} - case SIOCWANDEV: - switch (ifr->ifr_settings.type) { - case IF_GET_IFACE: - return fst_get_iface(card, port, ifr); - - case IF_IFACE_SYNC_SERIAL: - case IF_IFACE_V35: - case IF_IFACE_V24: - case IF_IFACE_X21: - case IF_IFACE_X21D: - case IF_IFACE_T1: - case IF_IFACE_E1: - return fst_set_iface(card, port, ifr); - - case IF_PROTO_RAW: - port->mode = FST_RAW; - return 0; +static int +fst_ioctl(struct net_device *dev, struct if_settings *ifs) +{ + struct fst_card_info *card; + struct fst_port_info *port; - case IF_GET_PROTO: - if (port->mode == FST_RAW) { - ifr->ifr_settings.type = IF_PROTO_RAW; - return 0; - } - return hdlc_ioctl(dev, ifr, cmd); + dbg(DBG_IOCTL, "SIOCDEVPRIVATE, %x\n", ifs->type); - default: - port->mode = FST_GEN_HDLC; - dbg(DBG_IOCTL, "Passing this type to hdlc %x\n", - ifr->ifr_settings.type); - return hdlc_ioctl(dev, ifr, cmd); + port = dev_to_port(dev); + card = port->card; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (ifs->type) { + case IF_GET_IFACE: + return fst_get_iface(card, port, ifs); + + case IF_IFACE_SYNC_SERIAL: + case IF_IFACE_V35: + case IF_IFACE_V24: + case IF_IFACE_X21: + case IF_IFACE_X21D: + case IF_IFACE_T1: + case IF_IFACE_E1: + return fst_set_iface(card, port, ifs); + + case IF_PROTO_RAW: + port->mode = FST_RAW; + return 0; + + case IF_GET_PROTO: + if (port->mode == FST_RAW) { + ifs->type = IF_PROTO_RAW; + return 0; } + return hdlc_ioctl(dev, ifs); default: - /* Not one of ours. Pass through to HDLC package */ - return hdlc_ioctl(dev, ifr, cmd); + port->mode = FST_GEN_HDLC; + dbg(DBG_IOCTL, "Passing this type to hdlc %x\n", + ifs->type); + return hdlc_ioctl(dev, ifs); } } @@ -2310,7 +2320,8 @@ static const struct net_device_ops fst_ops = { .ndo_open = fst_open, .ndo_stop = fst_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = fst_ioctl, + .ndo_siocwandev = fst_ioctl, + .ndo_siocdevprivate = fst_siocdevprivate, .ndo_tx_timeout = fst_tx_timeout, }; diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 39f05fabbfa4..cda1b4ce6b21 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -674,31 +674,28 @@ static irqreturn_t ucc_hdlc_irq_handler(int irq, void *dev_id) return IRQ_HANDLED; } -static int uhdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int uhdlc_ioctl(struct net_device *dev, struct if_settings *ifs) { const size_t size = sizeof(te1_settings); te1_settings line; struct ucc_hdlc_private *priv = netdev_priv(dev); - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); - - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_IFACE: - ifr->ifr_settings.type = IF_IFACE_E1; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_E1; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } memset(&line, 0, sizeof(line)); line.clock_type = priv->clocking; - if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &line, size)) + if (copy_to_user(ifs->ifs_ifsu.sync, &line, size)) return -EFAULT; return 0; default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -1053,7 +1050,7 @@ static const struct net_device_ops uhdlc_ops = { .ndo_open = uhdlc_open, .ndo_stop = uhdlc_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = uhdlc_ioctl, + .ndo_siocwandev = uhdlc_ioctl, .ndo_tx_timeout = uhdlc_tx_timeout, }; diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index dd6312b69861..cbed10b1d862 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -196,16 +196,13 @@ void hdlc_close(struct net_device *dev) } EXPORT_SYMBOL(hdlc_close); -int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +int hdlc_ioctl(struct net_device *dev, struct if_settings *ifs) { struct hdlc_proto *proto = first_proto; int result; - if (cmd != SIOCWANDEV) - return -EINVAL; - if (dev_to_hdlc(dev)->proto) { - result = dev_to_hdlc(dev)->proto->ioctl(dev, ifr); + result = dev_to_hdlc(dev)->proto->ioctl(dev, ifs); if (result != -EINVAL) return result; } @@ -213,7 +210,7 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /* Not handled by currently attached protocol (if any) */ while (proto) { - result = proto->ioctl(dev, ifr); + result = proto->ioctl(dev, ifs); if (result != -EINVAL) return result; proto = proto->next; diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index c54fdae950fb..cdebe65a7e2d 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -56,7 +56,7 @@ struct cisco_state { u32 rxseq; /* RX sequence number */ }; -static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr); +static int cisco_ioctl(struct net_device *dev, struct if_settings *ifs); static inline struct cisco_state *state(hdlc_device *hdlc) { @@ -306,21 +306,21 @@ static const struct header_ops cisco_header_ops = { .create = cisco_hard_header, }; -static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr) +static int cisco_ioctl(struct net_device *dev, struct if_settings *ifs) { - cisco_proto __user *cisco_s = ifr->ifr_settings.ifs_ifsu.cisco; + cisco_proto __user *cisco_s = ifs->ifs_ifsu.cisco; const size_t size = sizeof(cisco_proto); cisco_proto new_settings; hdlc_device *hdlc = dev_to_hdlc(dev); int result; - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_PROTO: if (dev_to_hdlc(dev)->proto != &proto) return -EINVAL; - ifr->ifr_settings.type = IF_PROTO_CISCO; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_PROTO_CISCO; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(cisco_s, &state(hdlc)->settings, size)) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 25e3564ce118..7637edce443e 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -146,7 +146,7 @@ struct frad_state { u8 rxseq; /* RX sequence number */ }; -static int fr_ioctl(struct net_device *dev, struct ifreq *ifr); +static int fr_ioctl(struct net_device *dev, struct if_settings *ifs); static inline u16 q922_to_dlci(u8 *hdr) { @@ -357,26 +357,26 @@ static int pvc_close(struct net_device *dev) return 0; } -static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int pvc_ioctl(struct net_device *dev, struct if_settings *ifs) { struct pvc_device *pvc = dev->ml_priv; fr_proto_pvc_info info; - if (ifr->ifr_settings.type == IF_GET_PROTO) { + if (ifs->type == IF_GET_PROTO) { if (dev->type == ARPHRD_ETHER) - ifr->ifr_settings.type = IF_PROTO_FR_ETH_PVC; + ifs->type = IF_PROTO_FR_ETH_PVC; else - ifr->ifr_settings.type = IF_PROTO_FR_PVC; + ifs->type = IF_PROTO_FR_PVC; - if (ifr->ifr_settings.size < sizeof(info)) { + if (ifs->size < sizeof(info)) { /* data size wanted */ - ifr->ifr_settings.size = sizeof(info); + ifs->size = sizeof(info); return -ENOBUFS; } info.dlci = pvc->dlci; memcpy(info.master, pvc->frad->name, IFNAMSIZ); - if (copy_to_user(ifr->ifr_settings.ifs_ifsu.fr_pvc_info, + if (copy_to_user(ifs->ifs_ifsu.fr_pvc_info, &info, sizeof(info))) return -EFAULT; return 0; @@ -1056,7 +1056,7 @@ static const struct net_device_ops pvc_ops = { .ndo_open = pvc_open, .ndo_stop = pvc_close, .ndo_start_xmit = pvc_xmit, - .ndo_do_ioctl = pvc_ioctl, + .ndo_siocwandev = pvc_ioctl, }; static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) @@ -1179,22 +1179,22 @@ static struct hdlc_proto proto = { .module = THIS_MODULE, }; -static int fr_ioctl(struct net_device *dev, struct ifreq *ifr) +static int fr_ioctl(struct net_device *dev, struct if_settings *ifs) { - fr_proto __user *fr_s = ifr->ifr_settings.ifs_ifsu.fr; + fr_proto __user *fr_s = ifs->ifs_ifsu.fr; const size_t size = sizeof(fr_proto); fr_proto new_settings; hdlc_device *hdlc = dev_to_hdlc(dev); fr_proto_pvc pvc; int result; - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_PROTO: if (dev_to_hdlc(dev)->proto != &proto) /* Different proto */ return -EINVAL; - ifr->ifr_settings.type = IF_PROTO_FR; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_PROTO_FR; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(fr_s, &state(hdlc)->settings, size)) @@ -1256,21 +1256,21 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr) if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&pvc, ifr->ifr_settings.ifs_ifsu.fr_pvc, + if (copy_from_user(&pvc, ifs->ifs_ifsu.fr_pvc, sizeof(fr_proto_pvc))) return -EFAULT; if (pvc.dlci <= 0 || pvc.dlci >= 1024) return -EINVAL; /* Only 10 bits, DLCI 0 reserved */ - if (ifr->ifr_settings.type == IF_PROTO_FR_ADD_ETH_PVC || - ifr->ifr_settings.type == IF_PROTO_FR_DEL_ETH_PVC) + if (ifs->type == IF_PROTO_FR_ADD_ETH_PVC || + ifs->type == IF_PROTO_FR_DEL_ETH_PVC) result = ARPHRD_ETHER; /* bridged Ethernet device */ else result = ARPHRD_DLCI; - if (ifr->ifr_settings.type == IF_PROTO_FR_ADD_PVC || - ifr->ifr_settings.type == IF_PROTO_FR_ADD_ETH_PVC) + if (ifs->type == IF_PROTO_FR_ADD_PVC || + ifs->type == IF_PROTO_FR_ADD_ETH_PVC) return fr_add_pvc(dev, pvc.dlci, result); else return fr_del_pvc(hdlc, pvc.dlci, result); diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index b81ecf432a0c..37a3c989cba1 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -100,7 +100,7 @@ static const char *const event_names[EVENTS] = { static struct sk_buff_head tx_queue; /* used when holding the spin lock */ -static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr); +static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs); static inline struct ppp *get_ppp(struct net_device *dev) { @@ -655,17 +655,17 @@ static const struct header_ops ppp_header_ops = { .create = ppp_hard_header, }; -static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr) +static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs) { hdlc_device *hdlc = dev_to_hdlc(dev); struct ppp *ppp; int result; - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_PROTO: if (dev_to_hdlc(dev)->proto != &proto) return -EINVAL; - ifr->ifr_settings.type = IF_PROTO_PPP; + ifs->type = IF_PROTO_PPP; return 0; /* return protocol only, no settable parameters */ case IF_PROTO_PPP: diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c index 54d28496fefd..4a2f068721bc 100644 --- a/drivers/net/wan/hdlc_raw.c +++ b/drivers/net/wan/hdlc_raw.c @@ -19,7 +19,7 @@ #include <linux/skbuff.h> -static int raw_ioctl(struct net_device *dev, struct ifreq *ifr); +static int raw_ioctl(struct net_device *dev, struct if_settings *ifs); static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev) { @@ -33,21 +33,21 @@ static struct hdlc_proto proto = { }; -static int raw_ioctl(struct net_device *dev, struct ifreq *ifr) +static int raw_ioctl(struct net_device *dev, struct if_settings *ifs) { - raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc; + raw_hdlc_proto __user *raw_s = ifs->ifs_ifsu.raw_hdlc; const size_t size = sizeof(raw_hdlc_proto); raw_hdlc_proto new_settings; hdlc_device *hdlc = dev_to_hdlc(dev); int result; - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_PROTO: if (dev_to_hdlc(dev)->proto != &proto) return -EINVAL; - ifr->ifr_settings.type = IF_PROTO_HDLC; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_PROTO_HDLC; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(raw_s, hdlc->state, size)) diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c index 927596276a07..0a66b7356405 100644 --- a/drivers/net/wan/hdlc_raw_eth.c +++ b/drivers/net/wan/hdlc_raw_eth.c @@ -20,7 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/skbuff.h> -static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr); +static int raw_eth_ioctl(struct net_device *dev, struct if_settings *ifs); static netdev_tx_t eth_tx(struct sk_buff *skb, struct net_device *dev) { @@ -48,22 +48,22 @@ static struct hdlc_proto proto = { }; -static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) +static int raw_eth_ioctl(struct net_device *dev, struct if_settings *ifs) { - raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc; + raw_hdlc_proto __user *raw_s = ifs->ifs_ifsu.raw_hdlc; const size_t size = sizeof(raw_hdlc_proto); raw_hdlc_proto new_settings; hdlc_device *hdlc = dev_to_hdlc(dev); unsigned int old_qlen; int result; - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_PROTO: if (dev_to_hdlc(dev)->proto != &proto) return -EINVAL; - ifr->ifr_settings.type = IF_PROTO_HDLC_ETH; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_PROTO_HDLC_ETH; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(raw_s, hdlc->state, size)) diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index 9b7ebf8bd85c..f72c92c24003 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -29,7 +29,7 @@ struct x25_state { struct tasklet_struct rx_tasklet; }; -static int x25_ioctl(struct net_device *dev, struct ifreq *ifr); +static int x25_ioctl(struct net_device *dev, struct if_settings *ifs); static struct x25_state *state(hdlc_device *hdlc) { @@ -274,21 +274,21 @@ static struct hdlc_proto proto = { .module = THIS_MODULE, }; -static int x25_ioctl(struct net_device *dev, struct ifreq *ifr) +static int x25_ioctl(struct net_device *dev, struct if_settings *ifs) { - x25_hdlc_proto __user *x25_s = ifr->ifr_settings.ifs_ifsu.x25; + x25_hdlc_proto __user *x25_s = ifs->ifs_ifsu.x25; const size_t size = sizeof(x25_hdlc_proto); hdlc_device *hdlc = dev_to_hdlc(dev); x25_hdlc_proto new_settings; int result; - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_PROTO: if (dev_to_hdlc(dev)->proto != &proto) return -EINVAL; - ifr->ifr_settings.type = IF_PROTO_X25; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_PROTO_X25; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(x25_s, &state(hdlc)->settings, size)) @@ -303,7 +303,7 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr) return -EBUSY; /* backward compatibility */ - if (ifr->ifr_settings.size == 0) { + if (ifs->size == 0) { new_settings.dce = 0; new_settings.modulo = 8; new_settings.window = 7; diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index fd61a7cc4fdf..e985e54ba75d 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -142,11 +142,6 @@ static int hostess_close(struct net_device *d) return 0; } -static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd) -{ - return hdlc_ioctl(d, ifr, cmd); -} - /* Passed network frames, fire them downwind. */ @@ -171,7 +166,7 @@ static const struct net_device_ops hostess_ops = { .ndo_open = hostess_open, .ndo_stop = hostess_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = hostess_ioctl, + .ndo_siocwandev = hdlc_ioctl, }; static struct z8530_dev *sv11_init(int iobase, int irq) @@ -324,16 +319,18 @@ MODULE_DESCRIPTION("Modular driver for the Comtrol Hostess SV11"); static struct z8530_dev *sv11_unit; -int init_module(void) +static int sv11_module_init(void) { sv11_unit = sv11_init(io, irq); if (!sv11_unit) return -ENODEV; return 0; } +module_init(sv11_module_init); -void cleanup_module(void) +static void sv11_module_cleanup(void) { if (sv11_unit) sv11_shutdown(sv11_unit); } +module_exit(sv11_module_cleanup); diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 3c51ab239fb2..88a36a069311 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -975,11 +975,10 @@ static int init_hdlc_queues(struct port *port) return -ENOMEM; } - port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL, + port->desc_tab = dma_pool_zalloc(dma_pool, GFP_KERNEL, &port->desc_tab_phys); if (!port->desc_tab) return -ENOMEM; - memset(port->desc_tab, 0, POOL_ALLOC_SIZE); memset(port->rx_buff_tab, 0, sizeof(port->rx_buff_tab)); /* tables */ memset(port->tx_buff_tab, 0, sizeof(port->tx_buff_tab)); @@ -1255,23 +1254,20 @@ static void find_best_clock(u32 timer_freq, u32 rate, u32 *best, u32 *reg) } } -static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int hss_hdlc_ioctl(struct net_device *dev, struct if_settings *ifs) { const size_t size = sizeof(sync_serial_settings); sync_serial_settings new_line; - sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; + sync_serial_settings __user *line = ifs->ifs_ifsu.sync; struct port *port = dev_to_port(dev); unsigned long flags; int clk; - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); - - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_IFACE: - ifr->ifr_settings.type = IF_IFACE_V35; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_V35; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } memset(&new_line, 0, sizeof(new_line)); @@ -1324,7 +1320,7 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -1336,7 +1332,7 @@ static const struct net_device_ops hss_hdlc_ops = { .ndo_open = hss_hdlc_open, .ndo_stop = hss_hdlc_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = hss_hdlc_ioctl, + .ndo_siocwandev = hss_hdlc_ioctl, }; static int hss_init_one(struct platform_device *pdev) diff --git a/drivers/net/wan/lmc/lmc.h b/drivers/net/wan/lmc/lmc.h index 3bd541c868d5..d7d59b4595f9 100644 --- a/drivers/net/wan/lmc/lmc.h +++ b/drivers/net/wan/lmc/lmc.h @@ -19,7 +19,7 @@ void lmc_mii_writereg(lmc_softc_t * const, unsigned, unsigned, unsigned); void lmc_gpio_mkinput(lmc_softc_t * const sc, u32 bits); void lmc_gpio_mkoutput(lmc_softc_t * const sc, u32 bits); -int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +int lmc_ioctl(struct net_device *dev, struct if_settings *ifs); extern lmc_media_t lmc_ds3_media; extern lmc_media_t lmc_ssi_media; diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 6c163db52835..ed687bf6ec47 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -105,7 +105,8 @@ static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue); * linux reserves 16 device specific IOCTLs. We call them * LMCIOC* to control various bits of our world. */ -int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ +static int lmc_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) /*fold00*/ { lmc_softc_t *sc = dev_to_sc(dev); lmc_ctl_t ctl; @@ -124,7 +125,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ * To date internally, just copy this out to the user. */ case LMCIOCGINFO: /*fold01*/ - if (copy_to_user(ifr->ifr_data, &sc->ictl, sizeof(lmc_ctl_t))) + if (copy_to_user(data, &sc->ictl, sizeof(lmc_ctl_t))) ret = -EFAULT; else ret = 0; @@ -141,7 +142,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ break; } - if (copy_from_user(&ctl, ifr->ifr_data, sizeof(lmc_ctl_t))) { + if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) { ret = -EFAULT; break; } @@ -171,7 +172,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ break; } - if (copy_from_user(&new_type, ifr->ifr_data, sizeof(u16))) { + if (copy_from_user(&new_type, data, sizeof(u16))) { ret = -EFAULT; break; } @@ -211,8 +212,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ sc->lmc_xinfo.Magic1 = 0xDEADBEEF; - if (copy_to_user(ifr->ifr_data, &sc->lmc_xinfo, - sizeof(struct lmc_xinfo))) + if (copy_to_user(data, &sc->lmc_xinfo, sizeof(struct lmc_xinfo))) ret = -EFAULT; else ret = 0; @@ -245,9 +245,9 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ regVal & T1FRAMER_SEF_MASK; } spin_unlock_irqrestore(&sc->lmc_lock, flags); - if (copy_to_user(ifr->ifr_data, &sc->lmc_device->stats, + if (copy_to_user(data, &sc->lmc_device->stats, sizeof(sc->lmc_device->stats)) || - copy_to_user(ifr->ifr_data + sizeof(sc->lmc_device->stats), + copy_to_user(data + sizeof(sc->lmc_device->stats), &sc->extra_stats, sizeof(sc->extra_stats))) ret = -EFAULT; else @@ -282,7 +282,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ break; } - if (copy_from_user(&ctl, ifr->ifr_data, sizeof(lmc_ctl_t))) { + if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) { ret = -EFAULT; break; } @@ -314,11 +314,11 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ #ifdef DEBUG case LMCIOCDUMPEVENTLOG: - if (copy_to_user(ifr->ifr_data, &lmcEventLogIndex, sizeof(u32))) { + if (copy_to_user(data, &lmcEventLogIndex, sizeof(u32))) { ret = -EFAULT; break; } - if (copy_to_user(ifr->ifr_data + sizeof(u32), lmcEventLogBuf, + if (copy_to_user(data + sizeof(u32), lmcEventLogBuf, sizeof(lmcEventLogBuf))) ret = -EFAULT; else @@ -346,7 +346,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ */ netif_stop_queue(dev); - if (copy_from_user(&xc, ifr->ifr_data, sizeof(struct lmc_xilinx_control))) { + if (copy_from_user(&xc, data, sizeof(struct lmc_xilinx_control))) { ret = -EFAULT; break; } @@ -609,10 +609,8 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ } break; - default: /*fold01*/ - /* If we don't know what to do, give the protocol a shot. */ - ret = lmc_proto_ioctl (sc, ifr, cmd); - break; + default: + break; } return ret; @@ -788,7 +786,8 @@ static const struct net_device_ops lmc_ops = { .ndo_open = lmc_open, .ndo_stop = lmc_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = lmc_ioctl, + .ndo_siocwandev = hdlc_ioctl, + .ndo_siocdevprivate = lmc_siocdevprivate, .ndo_tx_timeout = lmc_driver_timeout, .ndo_get_stats = lmc_get_stats, }; diff --git a/drivers/net/wan/lmc/lmc_proto.c b/drivers/net/wan/lmc/lmc_proto.c index 4e9cc83b615a..e5487616a816 100644 --- a/drivers/net/wan/lmc/lmc_proto.c +++ b/drivers/net/wan/lmc/lmc_proto.c @@ -58,13 +58,6 @@ void lmc_proto_attach(lmc_softc_t *sc) /*FOLD00*/ } } -int lmc_proto_ioctl(lmc_softc_t *sc, struct ifreq *ifr, int cmd) -{ - if (sc->if_type == LMC_PPP) - return hdlc_ioctl(sc->lmc_device, ifr, cmd); - return -EOPNOTSUPP; -} - int lmc_proto_open(lmc_softc_t *sc) { int ret = 0; diff --git a/drivers/net/wan/lmc/lmc_proto.h b/drivers/net/wan/lmc/lmc_proto.h index bb098e443776..e56e7072de44 100644 --- a/drivers/net/wan/lmc/lmc_proto.h +++ b/drivers/net/wan/lmc/lmc_proto.h @@ -5,7 +5,6 @@ #include <linux/hdlc.h> void lmc_proto_attach(lmc_softc_t *sc); -int lmc_proto_ioctl(lmc_softc_t *sc, struct ifreq *ifr, int cmd); int lmc_proto_open(lmc_softc_t *sc); void lmc_proto_close(lmc_softc_t *sc); __be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb); diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c index bdb6dc2409bc..f3e80722ba1d 100644 --- a/drivers/net/wan/n2.c +++ b/drivers/net/wan/n2.c @@ -227,27 +227,30 @@ static int n2_close(struct net_device *dev) return 0; } -static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int n2_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { - const size_t size = sizeof(sync_serial_settings); - sync_serial_settings new_line; - sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; - port_t *port = dev_to_port(dev); - #ifdef DEBUG_RINGS if (cmd == SIOCDEVPRIVATE) { sca_dump_rings(dev); return 0; } #endif - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); + return -EOPNOTSUPP; +} + +static int n2_ioctl(struct net_device *dev, struct if_settings *ifs) +{ + const size_t size = sizeof(sync_serial_settings); + sync_serial_settings new_line; + sync_serial_settings __user *line = ifs->ifs_ifsu.sync; + port_t *port = dev_to_port(dev); - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_IFACE: - ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_SYNC_SERIAL; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(line, &port->settings, size)) @@ -275,7 +278,7 @@ static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -311,7 +314,8 @@ static const struct net_device_ops n2_ops = { .ndo_open = n2_open, .ndo_stop = n2_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = n2_ioctl, + .ndo_siocwandev = n2_ioctl, + .ndo_siocdevprivate = n2_siocdevprivate, }; static int __init n2_run(unsigned long io, unsigned long irq, diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c index 7b123a771aa6..4766446f0fa0 100644 --- a/drivers/net/wan/pc300too.c +++ b/drivers/net/wan/pc300too.c @@ -174,27 +174,30 @@ static int pc300_close(struct net_device *dev) return 0; } -static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int pc300_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { - const size_t size = sizeof(sync_serial_settings); - sync_serial_settings new_line; - sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; - int new_type; - port_t *port = dev_to_port(dev); - #ifdef DEBUG_RINGS if (cmd == SIOCDEVPRIVATE) { sca_dump_rings(dev); return 0; } #endif - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); + return -EOPNOTSUPP; +} + +static int pc300_ioctl(struct net_device *dev, struct if_settings *ifs) +{ + const size_t size = sizeof(sync_serial_settings); + sync_serial_settings new_line; + sync_serial_settings __user *line = ifs->ifs_ifsu.sync; + int new_type; + port_t *port = dev_to_port(dev); - if (ifr->ifr_settings.type == IF_GET_IFACE) { - ifr->ifr_settings.type = port->iface; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + if (ifs->type == IF_GET_IFACE) { + ifs->type = port->iface; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(line, &port->settings, size)) @@ -203,21 +206,21 @@ static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } if (port->card->type == PC300_X21 && - (ifr->ifr_settings.type == IF_IFACE_SYNC_SERIAL || - ifr->ifr_settings.type == IF_IFACE_X21)) + (ifs->type == IF_IFACE_SYNC_SERIAL || + ifs->type == IF_IFACE_X21)) new_type = IF_IFACE_X21; else if (port->card->type == PC300_RSV && - (ifr->ifr_settings.type == IF_IFACE_SYNC_SERIAL || - ifr->ifr_settings.type == IF_IFACE_V35)) + (ifs->type == IF_IFACE_SYNC_SERIAL || + ifs->type == IF_IFACE_V35)) new_type = IF_IFACE_V35; else if (port->card->type == PC300_RSV && - ifr->ifr_settings.type == IF_IFACE_V24) + ifs->type == IF_IFACE_V24) new_type = IF_IFACE_V24; else - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -272,7 +275,8 @@ static const struct net_device_ops pc300_ops = { .ndo_open = pc300_open, .ndo_stop = pc300_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = pc300_ioctl, + .ndo_siocwandev = pc300_ioctl, + .ndo_siocdevprivate = pc300_siocdevprivate, }; static int pc300_pci_init_one(struct pci_dev *pdev, diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c index dee9c4e15eca..ea86c7035653 100644 --- a/drivers/net/wan/pci200syn.c +++ b/drivers/net/wan/pci200syn.c @@ -167,27 +167,30 @@ static int pci200_close(struct net_device *dev) return 0; } -static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int pci200_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { - const size_t size = sizeof(sync_serial_settings); - sync_serial_settings new_line; - sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; - port_t *port = dev_to_port(dev); - #ifdef DEBUG_RINGS if (cmd == SIOCDEVPRIVATE) { sca_dump_rings(dev); return 0; } #endif - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); + return -EOPNOTSUPP; +} + +static int pci200_ioctl(struct net_device *dev, struct if_settings *ifs) +{ + const size_t size = sizeof(sync_serial_settings); + sync_serial_settings new_line; + sync_serial_settings __user *line = ifs->ifs_ifsu.sync; + port_t *port = dev_to_port(dev); - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_IFACE: - ifr->ifr_settings.type = IF_IFACE_V35; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_V35; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } if (copy_to_user(line, &port->settings, size)) @@ -217,7 +220,7 @@ static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -253,7 +256,8 @@ static const struct net_device_ops pci200_ops = { .ndo_open = pci200_open, .ndo_stop = pci200_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = pci200_ioctl, + .ndo_siocwandev = pci200_ioctl, + .ndo_siocdevprivate = pci200_siocdevprivate, }; static int pci200_pci_init_one(struct pci_dev *pdev, diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c deleted file mode 100644 index 3092a09d3eaa..000000000000 --- a/drivers/net/wan/sbni.c +++ /dev/null @@ -1,1638 +0,0 @@ -/* sbni.c: Granch SBNI12 leased line adapters driver for linux - * - * Written 2001 by Denis I.Timofeev (timofeev@granch.ru) - * - * Previous versions were written by Yaroslav Polyakov, - * Alexey Zverev and Max Khon. - * - * Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and - * double-channel, PCI and ISA modifications. - * More info and useful utilities to work with SBNI12 cards you can find - * at http://www.granch.com (English) or http://www.granch.ru (Russian) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License. - * - * - * 5.0.1 Jun 22 2001 - * - Fixed bug in probe - * 5.0.0 Jun 06 2001 - * - Driver was completely redesigned by Denis I.Timofeev, - * - now PCI/Dual, ISA/Dual (with single interrupt line) models are - * - supported - * 3.3.0 Thu Feb 24 21:30:28 NOVT 2000 - * - PCI cards support - * 3.2.0 Mon Dec 13 22:26:53 NOVT 1999 - * - Completely rebuilt all the packet storage system - * - to work in Ethernet-like style. - * 3.1.1 just fixed some bugs (5 aug 1999) - * 3.1.0 added balancing feature (26 apr 1999) - * 3.0.1 just fixed some bugs (14 apr 1999). - * 3.0.0 Initial Revision, Yaroslav Polyakov (24 Feb 1999) - * - added pre-calculation for CRC, fixed bug with "len-2" frames, - * - removed outbound fragmentation (MTU=1000), written CRC-calculation - * - on asm, added work with hard_headers and now we have our own cache - * - for them, optionally supported word-interchange on some chipsets, - * - * Known problem: this driver wasn't tested on multiprocessor machine. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/ptrace.h> -#include <linux/fcntl.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/pci.h> -#include <linux/skbuff.h> -#include <linux/timer.h> -#include <linux/init.h> -#include <linux/delay.h> - -#include <net/net_namespace.h> -#include <net/arp.h> -#include <net/Space.h> - -#include <asm/io.h> -#include <asm/types.h> -#include <asm/byteorder.h> -#include <asm/irq.h> -#include <linux/uaccess.h> - -#include "sbni.h" - -/* device private data */ - -struct net_local { - struct timer_list watchdog; - struct net_device *watchdog_dev; - - spinlock_t lock; - struct sk_buff *rx_buf_p; /* receive buffer ptr */ - struct sk_buff *tx_buf_p; /* transmit buffer ptr */ - - unsigned int framelen; /* current frame length */ - unsigned int maxframe; /* maximum valid frame length */ - unsigned int state; - unsigned int inppos, outpos; /* positions in rx/tx buffers */ - - /* transmitting frame number - from frames qty to 1 */ - unsigned int tx_frameno; - - /* expected number of next receiving frame */ - unsigned int wait_frameno; - - /* count of failed attempts to frame send - 32 attempts do before - error - while receiver tunes on opposite side of wire */ - unsigned int trans_errors; - - /* idle time; send pong when limit exceeded */ - unsigned int timer_ticks; - - /* fields used for receive level autoselection */ - int delta_rxl; - unsigned int cur_rxl_index, timeout_rxl; - unsigned long cur_rxl_rcvd, prev_rxl_rcvd; - - struct sbni_csr1 csr1; /* current value of CSR1 */ - struct sbni_in_stats in_stats; /* internal statistics */ - - struct net_device *second; /* for ISA/dual cards */ - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *master; - struct net_device *link; -#endif -}; - - -static int sbni_card_probe( unsigned long ); -static int sbni_pci_probe( struct net_device * ); -static struct net_device *sbni_probe1(struct net_device *, unsigned long, int); -static int sbni_open( struct net_device * ); -static int sbni_close( struct net_device * ); -static netdev_tx_t sbni_start_xmit(struct sk_buff *, - struct net_device * ); -static int sbni_ioctl( struct net_device *, struct ifreq *, int ); -static void set_multicast_list( struct net_device * ); - -static irqreturn_t sbni_interrupt( int, void * ); -static void handle_channel( struct net_device * ); -static int recv_frame( struct net_device * ); -static void send_frame( struct net_device * ); -static int upload_data( struct net_device *, - unsigned, unsigned, unsigned, u32 ); -static void download_data( struct net_device *, u32 * ); -static void sbni_watchdog(struct timer_list *); -static void interpret_ack( struct net_device *, unsigned ); -static int append_frame_to_pkt( struct net_device *, unsigned, u32 ); -static void indicate_pkt( struct net_device * ); -static void card_start( struct net_device * ); -static void prepare_to_send( struct sk_buff *, struct net_device * ); -static void drop_xmit_queue( struct net_device * ); -static void send_frame_header( struct net_device *, u32 * ); -static int skip_tail( unsigned int, unsigned int, u32 ); -static int check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * ); -static void change_level( struct net_device * ); -static void timeout_change_level( struct net_device * ); -static u32 calc_crc32( u32, u8 *, u32 ); -static struct sk_buff * get_rx_buf( struct net_device * ); -static int sbni_init( struct net_device * ); - -#ifdef CONFIG_SBNI_MULTILINE -static int enslave( struct net_device *, struct net_device * ); -static int emancipate( struct net_device * ); -#endif - -static const char version[] = - "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n"; - -static bool skip_pci_probe __initdata = false; -static int scandone __initdata = 0; -static int num __initdata = 0; - -static unsigned char rxl_tab[]; -static u32 crc32tab[]; - -/* A list of all installed devices, for removing the driver module. */ -static struct net_device *sbni_cards[ SBNI_MAX_NUM_CARDS ]; - -/* Lists of device's parameters */ -static u32 io[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 irq[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 baud[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 rxl[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 mac[ SBNI_MAX_NUM_CARDS ] __initdata; - -#ifndef MODULE -typedef u32 iarr[]; -static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac }; -#endif - -/* A zero-terminated list of I/O addresses to be probed on ISA bus */ -static unsigned int netcard_portlist[ ] __initdata = { - 0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254, - 0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4, - 0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4, - 0 }; - -#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock) - -/* - * Look for SBNI card which addr stored in dev->base_addr, if nonzero. - * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA. - */ - -static inline int __init -sbni_isa_probe( struct net_device *dev ) -{ - if( dev->base_addr > 0x1ff && - request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, dev->base_addr, dev->irq ) ) - - return 0; - else { - pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n", - dev->base_addr); - return -ENODEV; - } -} - -static const struct net_device_ops sbni_netdev_ops = { - .ndo_open = sbni_open, - .ndo_stop = sbni_close, - .ndo_start_xmit = sbni_start_xmit, - .ndo_set_rx_mode = set_multicast_list, - .ndo_do_ioctl = sbni_ioctl, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -}; - -static void __init sbni_devsetup(struct net_device *dev) -{ - ether_setup( dev ); - dev->netdev_ops = &sbni_netdev_ops; -} - -int __init sbni_probe(int unit) -{ - struct net_device *dev; - int err; - - dev = alloc_netdev(sizeof(struct net_local), "sbni", - NET_NAME_UNKNOWN, sbni_devsetup); - if (!dev) - return -ENOMEM; - - dev->netdev_ops = &sbni_netdev_ops; - - sprintf(dev->name, "sbni%d", unit); - netdev_boot_setup_check(dev); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - return err; - } - - err = register_netdev(dev); - if (err) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev(dev); - return err; - } - pr_info_once("%s", version); - return 0; -} - -static int __init sbni_init(struct net_device *dev) -{ - int i; - if( dev->base_addr ) - return sbni_isa_probe( dev ); - /* otherwise we have to perform search our adapter */ - - if( io[ num ] != -1 ) { - dev->base_addr = io[ num ]; - dev->irq = irq[ num ]; - } else if( scandone || io[ 0 ] != -1 ) { - return -ENODEV; - } - - /* if io[ num ] contains non-zero address, then that is on ISA bus */ - if( dev->base_addr ) - return sbni_isa_probe( dev ); - - /* ...otherwise - scan PCI first */ - if( !skip_pci_probe && !sbni_pci_probe( dev ) ) - return 0; - - if( io[ num ] == -1 ) { - /* Auto-scan will be stopped when first ISA card were found */ - scandone = 1; - if( num > 0 ) - return -ENODEV; - } - - for( i = 0; netcard_portlist[ i ]; ++i ) { - int ioaddr = netcard_portlist[ i ]; - if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, ioaddr, 0 )) - return 0; - } - - return -ENODEV; -} - - -static int __init -sbni_pci_probe( struct net_device *dev ) -{ - struct pci_dev *pdev = NULL; - - while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev )) - != NULL ) { - int pci_irq_line; - unsigned long pci_ioaddr; - - if( pdev->vendor != SBNI_PCI_VENDOR && - pdev->device != SBNI_PCI_DEVICE ) - continue; - - pci_ioaddr = pci_resource_start( pdev, 0 ); - pci_irq_line = pdev->irq; - - /* Avoid already found cards from previous calls */ - if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) { - if (pdev->subsystem_device != 2) - continue; - - /* Dual adapter is present */ - if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT, - dev->name ) ) - continue; - } - - if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs) - pr_warn( -"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n" -"You should use the PCI BIOS setup to assign a valid IRQ line.\n", - pci_irq_line ); - - /* avoiding re-enable dual adapters */ - if( (pci_ioaddr & 7) == 0 && pci_enable_device( pdev ) ) { - release_region( pci_ioaddr, SBNI_IO_EXTENT ); - pci_dev_put( pdev ); - return -EIO; - } - if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) { - SET_NETDEV_DEV(dev, &pdev->dev); - /* not the best thing to do, but this is all messed up - for hotplug systems anyway... */ - pci_dev_put( pdev ); - return 0; - } - } - return -ENODEV; -} - - -static struct net_device * __init -sbni_probe1( struct net_device *dev, unsigned long ioaddr, int irq ) -{ - struct net_local *nl; - - if( sbni_card_probe( ioaddr ) ) { - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - outb( 0, ioaddr + CSR0 ); - - if( irq < 2 ) { - unsigned long irq_mask; - - irq_mask = probe_irq_on(); - outb( EN_INT | TR_REQ, ioaddr + CSR0 ); - outb( PR_RES, ioaddr + CSR1 ); - mdelay(50); - irq = probe_irq_off(irq_mask); - outb( 0, ioaddr + CSR0 ); - - if( !irq ) { - pr_err("%s: can't detect device irq!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - } else if( irq == 2 ) - irq = 9; - - dev->irq = irq; - dev->base_addr = ioaddr; - - /* Fill in sbni-specific dev fields. */ - nl = netdev_priv(dev); - if( !nl ) { - pr_err("%s: unable to get memory!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - memset( nl, 0, sizeof(struct net_local) ); - spin_lock_init( &nl->lock ); - - /* store MAC address (generate if that isn't known) */ - *(__be16 *)dev->dev_addr = htons( 0x00ff ); - *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 | - ((mac[num] ? - mac[num] : - (u32)((long)netdev_priv(dev))) & 0x00ffffff)); - - /* store link settings (speed, receive level ) */ - nl->maxframe = DEFAULT_FRAME_LEN; - nl->csr1.rate = baud[ num ]; - - if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) { - /* autotune rxl */ - nl->cur_rxl_index = DEF_RXL; - nl->delta_rxl = DEF_RXL_DELTA; - } else { - nl->delta_rxl = 0; - } - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - if( inb( ioaddr + CSR0 ) & 0x01 ) - nl->state |= FL_SLOW_MODE; - - pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n", - dev->name, dev->base_addr, dev->irq, - ((u8 *)dev->dev_addr)[3], - ((u8 *)dev->dev_addr)[4], - ((u8 *)dev->dev_addr)[5]); - - pr_notice("%s: speed %d", - dev->name, - ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000) - / (1 << nl->csr1.rate)); - - if( nl->delta_rxl == 0 ) - pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index); - else - pr_cont(", receive level (auto)\n"); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master = dev; - nl->link = NULL; -#endif - - sbni_cards[ num++ ] = dev; - return dev; -} - -/* -------------------------------------------------------------------------- */ - -#ifdef CONFIG_SBNI_MULTILINE - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_device *p; - - netif_stop_queue( dev ); - - /* Looking for idle device in the list */ - for( p = dev; p; ) { - struct net_local *nl = netdev_priv(p); - spin_lock( &nl->lock ); - if( nl->tx_buf_p || (nl->state & FL_LINE_DOWN) ) { - p = nl->link; - spin_unlock( &nl->lock ); - } else { - /* Idle dev is found */ - prepare_to_send( skb, p ); - spin_unlock( &nl->lock ); - netif_start_queue( dev ); - return NETDEV_TX_OK; - } - } - - return NETDEV_TX_BUSY; -} - -#else /* CONFIG_SBNI_MULTILINE */ - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - netif_stop_queue( dev ); - spin_lock( &nl->lock ); - - prepare_to_send( skb, dev ); - - spin_unlock( &nl->lock ); - return NETDEV_TX_OK; -} - -#endif /* CONFIG_SBNI_MULTILINE */ - -/* -------------------------------------------------------------------------- */ - -/* interrupt handler */ - -/* - * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not - * be looked as two independent single-channel devices. Every channel seems - * as Ethernet interface but interrupt handler must be common. Really, first - * channel ("master") driver only registers the handler. In its struct net_local - * it has got pointer to "slave" channel's struct net_local and handles that's - * interrupts too. - * dev of successfully attached ISA SBNI boards is linked to list. - * While next board driver is initialized, it scans this list. If one - * has found dev with same irq and ioaddr different by 4 then it assumes - * this board to be "master". - */ - -static irqreturn_t -sbni_interrupt( int irq, void *dev_id ) -{ - struct net_device *dev = dev_id; - struct net_local *nl = netdev_priv(dev); - int repeat; - - spin_lock( &nl->lock ); - if( nl->second ) - spin_lock(&NET_LOCAL_LOCK(nl->second)); - - do { - repeat = 0; - if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) { - handle_channel( dev ); - repeat = 1; - } - if( nl->second && /* second channel present */ - (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) { - handle_channel( nl->second ); - repeat = 1; - } - } while( repeat ); - - if( nl->second ) - spin_unlock(&NET_LOCAL_LOCK(nl->second)); - spin_unlock( &nl->lock ); - return IRQ_HANDLED; -} - - -static void -handle_channel( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - int req_ans; - unsigned char csr0; - -#ifdef CONFIG_SBNI_MULTILINE - /* Lock the master device because we going to change its local data */ - if( nl->state & FL_SLAVE ) - spin_lock(&NET_LOCAL_LOCK(nl->master)); -#endif - - outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 ); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - for(;;) { - csr0 = inb( ioaddr + CSR0 ); - if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 ) - break; - - req_ans = !(nl->state & FL_PREV_OK); - - if( csr0 & RC_RDY ) - req_ans = recv_frame( dev ); - - /* - * TR_RDY always equals 1 here because we have owned the marker, - * and we set TR_REQ when disabled interrupts - */ - csr0 = inb( ioaddr + CSR0 ); - if( !(csr0 & TR_RDY) || (csr0 & RC_RDY) ) - netdev_err(dev, "internal error!\n"); - - /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ - if( req_ans || nl->tx_frameno != 0 ) - send_frame( dev ); - else - /* send marker without any data */ - outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 ); - } - - outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 ); - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - spin_unlock(&NET_LOCAL_LOCK(nl->master)); -#endif -} - - -/* - * Routine returns 1 if it needs to acknowledge received frame. - * Empty frame received without errors won't be acknowledged. - */ - -static int -recv_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - u32 crc = CRC32_INITIAL; - - unsigned framelen = 0, frameno, ack; - unsigned is_first, frame_ok = 0; - - if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) { - frame_ok = framelen > 4 - ? upload_data( dev, framelen, frameno, is_first, crc ) - : skip_tail( ioaddr, framelen, crc ); - if( frame_ok ) - interpret_ack( dev, ack ); - } - - outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 ); - if( frame_ok ) { - nl->state |= FL_PREV_OK; - if( framelen > 4 ) - nl->in_stats.all_rx_number++; - } else { - nl->state &= ~FL_PREV_OK; - change_level( dev ); - nl->in_stats.all_rx_number++; - nl->in_stats.bad_rx_number++; - } - - return !frame_ok || framelen > 4; -} - - -static void -send_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = CRC32_INITIAL; - - if( nl->state & FL_NEED_RESEND ) { - - /* if frame was sended but not ACK'ed - resend it */ - if( nl->trans_errors ) { - --nl->trans_errors; - if( nl->framelen != 0 ) - nl->in_stats.resend_tx_number++; - } else { - /* cannot xmit with many attempts */ -#ifdef CONFIG_SBNI_MULTILINE - if( (nl->state & FL_SLAVE) || nl->link ) -#endif - nl->state |= FL_LINE_DOWN; - drop_xmit_queue( dev ); - goto do_send; - } - } else - nl->trans_errors = TR_ERROR_COUNT; - - send_frame_header( dev, &crc ); - nl->state |= FL_NEED_RESEND; - /* - * FL_NEED_RESEND will be cleared after ACK, but if empty - * frame sended then in prepare_to_send next frame - */ - - - if( nl->framelen ) { - download_data( dev, &crc ); - nl->in_stats.all_tx_number++; - nl->state |= FL_WAIT_ACK; - } - - outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc ); - -do_send: - outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 ); - - if( nl->tx_frameno ) - /* next frame exists - we request card to send it */ - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, - dev->base_addr + CSR0 ); -} - - -/* - * Write the frame data into adapter's buffer memory, and calculate CRC. - * Do padding if necessary. - */ - -static void -download_data( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->tx_buf_p; - - unsigned len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen); - - outsb( dev->base_addr + DAT, skb->data + nl->outpos, len ); - *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len ); - - /* if packet too short we should write some more bytes to pad */ - for( len = nl->framelen - len; len--; ) { - outb( 0, dev->base_addr + DAT ); - *crc_p = CRC32( 0, *crc_p ); - } -} - - -static int -upload_data( struct net_device *dev, unsigned framelen, unsigned frameno, - unsigned is_first, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - int frame_ok; - - if( is_first ) { - nl->wait_frameno = frameno; - nl->inppos = 0; - } - - if( nl->wait_frameno == frameno ) { - - if( nl->inppos + framelen <= ETHER_MAX_LEN ) - frame_ok = append_frame_to_pkt( dev, framelen, crc ); - - /* - * if CRC is right but framelen incorrect then transmitter - * error was occurred... drop entire packet - */ - else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc )) - != 0 ) { - nl->wait_frameno = 0; - nl->inppos = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_missed_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_missed_errors++; -#endif - } - /* now skip all frames until is_first != 0 */ - } else - frame_ok = skip_tail( dev->base_addr, framelen, crc ); - - if( is_first && !frame_ok ) { - /* - * Frame has been broken, but we had already stored - * is_first... Drop entire packet. - */ - nl->wait_frameno = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_crc_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_crc_errors++; -#endif - } - - return frame_ok; -} - - -static inline void -send_complete( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_packets++; - nl->master->stats.tx_bytes += nl->tx_buf_p->len; -#else - dev->stats.tx_packets++; - dev->stats.tx_bytes += nl->tx_buf_p->len; -#endif - dev_consume_skb_irq(nl->tx_buf_p); - - nl->tx_buf_p = NULL; - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->framelen = 0; -} - - -static void -interpret_ack( struct net_device *dev, unsigned ack ) -{ - struct net_local *nl = netdev_priv(dev); - - if( ack == FRAME_SENT_OK ) { - nl->state &= ~FL_NEED_RESEND; - - if( nl->state & FL_WAIT_ACK ) { - nl->outpos += nl->framelen; - - if( --nl->tx_frameno ) { - nl->framelen = min_t(unsigned int, - nl->maxframe, - nl->tx_buf_p->len - nl->outpos); - } else { - send_complete( dev ); -#ifdef CONFIG_SBNI_MULTILINE - netif_wake_queue( nl->master ); -#else - netif_wake_queue( dev ); -#endif - } - } - } - - nl->state &= ~FL_WAIT_ACK; -} - - -/* - * Glue received frame with previous fragments of packet. - * Indicate packet when last frame would be accepted. - */ - -static int -append_frame_to_pkt( struct net_device *dev, unsigned framelen, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - u8 *p; - - if( nl->inppos + framelen > ETHER_MAX_LEN ) - return 0; - - if( !nl->rx_buf_p && !(nl->rx_buf_p = get_rx_buf( dev )) ) - return 0; - - p = nl->rx_buf_p->data + nl->inppos; - insb( dev->base_addr + DAT, p, framelen ); - if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER ) - return 0; - - nl->inppos += framelen - 4; - if( --nl->wait_frameno == 0 ) /* last frame received */ - indicate_pkt( dev ); - - return 1; -} - - -/* - * Prepare to start output on adapter. - * Transmitter will be actually activated when marker is accepted. - */ - -static void -prepare_to_send( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - unsigned int len; - - /* nl->tx_buf_p == NULL here! */ - if( nl->tx_buf_p ) - netdev_err(dev, "memory leak!\n"); - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - - len = skb->len; - if( len < SBNI_MIN_LEN ) - len = SBNI_MIN_LEN; - - nl->tx_buf_p = skb; - nl->tx_frameno = DIV_ROUND_UP(len, nl->maxframe); - nl->framelen = len < nl->maxframe ? len : nl->maxframe; - - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, dev->base_addr + CSR0 ); -#ifdef CONFIG_SBNI_MULTILINE - netif_trans_update(nl->master); -#else - netif_trans_update(dev); -#endif -} - - -static void -drop_xmit_queue( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->tx_buf_p ) { - dev_kfree_skb_any( nl->tx_buf_p ); - nl->tx_buf_p = NULL; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_errors++; - nl->master->stats.tx_carrier_errors++; -#else - dev->stats.tx_errors++; - dev->stats.tx_carrier_errors++; -#endif - } - - nl->tx_frameno = 0; - nl->framelen = 0; - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); -#ifdef CONFIG_SBNI_MULTILINE - netif_start_queue( nl->master ); - netif_trans_update(nl->master); -#else - netif_start_queue( dev ); - netif_trans_update(dev); -#endif -} - - -static void -send_frame_header( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = *crc_p; - u32 len_field = nl->framelen + 6; /* CRC + frameno + reserved */ - u8 value; - - if( nl->state & FL_NEED_RESEND ) - len_field |= FRAME_RETRY; /* non-first attempt... */ - - if( nl->outpos == 0 ) - len_field |= FRAME_FIRST; - - len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; - outb( SBNI_SIG, dev->base_addr + DAT ); - - value = (u8) len_field; - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - value = (u8) (len_field >> 8); - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - - outb( nl->tx_frameno, dev->base_addr + DAT ); - crc = CRC32( nl->tx_frameno, crc ); - outb( 0, dev->base_addr + DAT ); - crc = CRC32( 0, crc ); - *crc_p = crc; -} - - -/* - * if frame tail not needed (incorrect number or received twice), - * it won't store, but CRC will be calculated - */ - -static int -skip_tail( unsigned int ioaddr, unsigned int tail_len, u32 crc ) -{ - while( tail_len-- ) - crc = CRC32( inb( ioaddr + DAT ), crc ); - - return crc == CRC32_REMAINDER; -} - - -/* - * Preliminary checks if frame header is correct, calculates its CRC - * and split it to simple fields - */ - -static int -check_fhdr( u32 ioaddr, u32 *framelen, u32 *frameno, u32 *ack, - u32 *is_first, u32 *crc_p ) -{ - u32 crc = *crc_p; - u8 value; - - if( inb( ioaddr + DAT ) != SBNI_SIG ) - return 0; - - value = inb( ioaddr + DAT ); - *framelen = (u32)value; - crc = CRC32( value, crc ); - value = inb( ioaddr + DAT ); - *framelen |= ((u32)value) << 8; - crc = CRC32( value, crc ); - - *ack = *framelen & FRAME_ACK_MASK; - *is_first = (*framelen & FRAME_FIRST) != 0; - - if( (*framelen &= FRAME_LEN_MASK) < 6 || - *framelen > SBNI_MAX_FRAME - 3 ) - return 0; - - value = inb( ioaddr + DAT ); - *frameno = (u32)value; - crc = CRC32( value, crc ); - - crc = CRC32( inb( ioaddr + DAT ), crc ); /* reserved byte */ - *framelen -= 2; - - *crc_p = crc; - return 1; -} - - -static struct sk_buff * -get_rx_buf( struct net_device *dev ) -{ - /* +2 is to compensate for the alignment fixup below */ - struct sk_buff *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 ); - if( !skb ) - return NULL; - - skb_reserve( skb, 2 ); /* Align IP on longword boundaries */ - return skb; -} - - -static void -indicate_pkt( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->rx_buf_p; - - skb_put( skb, nl->inppos ); - -#ifdef CONFIG_SBNI_MULTILINE - skb->protocol = eth_type_trans( skb, nl->master ); - netif_rx( skb ); - ++nl->master->stats.rx_packets; - nl->master->stats.rx_bytes += nl->inppos; -#else - skb->protocol = eth_type_trans( skb, dev ); - netif_rx( skb ); - ++dev->stats.rx_packets; - dev->stats.rx_bytes += nl->inppos; -#endif - nl->rx_buf_p = NULL; /* protocol driver will clear this sk_buff */ -} - - -/* -------------------------------------------------------------------------- */ - -/* - * Routine checks periodically wire activity and regenerates marker if - * connect was inactive for a long time. - */ - -static void -sbni_watchdog(struct timer_list *t) -{ - struct net_local *nl = from_timer(nl, t, watchdog); - struct net_device *dev = nl->watchdog_dev; - unsigned long flags; - unsigned char csr0; - - spin_lock_irqsave( &nl->lock, flags ); - - csr0 = inb( dev->base_addr + CSR0 ); - if( csr0 & RC_CHK ) { - - if( nl->timer_ticks ) { - if( csr0 & (RC_RDY | BU_EMP) ) - /* receiving not active */ - nl->timer_ticks--; - } else { - nl->in_stats.timeout_number++; - if( nl->delta_rxl ) - timeout_change_level( dev ); - - outb( *(u_char *)&nl->csr1 | PR_RES, - dev->base_addr + CSR1 ); - csr0 = inb( dev->base_addr + CSR0 ); - } - } else - nl->state &= ~FL_LINE_DOWN; - - outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); - - mod_timer(t, jiffies + SBNI_TIMEOUT); - - spin_unlock_irqrestore( &nl->lock, flags ); -} - - -static unsigned char rxl_tab[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f -}; - -#define SIZE_OF_TIMEOUT_RXL_TAB 4 -static unsigned char timeout_rxl_tab[] = { - 0x03, 0x05, 0x08, 0x0b -}; - -/* -------------------------------------------------------------------------- */ - -static void -card_start( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->state |= FL_PREV_OK; - - nl->inppos = nl->outpos = 0; - nl->wait_frameno = 0; - nl->tx_frameno = 0; - nl->framelen = 0; - - outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - outb( EN_INT, dev->base_addr + CSR0 ); -} - -/* -------------------------------------------------------------------------- */ - -/* Receive level auto-selection */ - -static void -change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->delta_rxl == 0 ) /* do not auto-negotiate RxL */ - return; - - if( nl->cur_rxl_index == 0 ) - nl->delta_rxl = 1; - else if( nl->cur_rxl_index == 15 ) - nl->delta_rxl = -1; - else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd ) - nl->delta_rxl = -nl->delta_rxl; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ]; - inb( dev->base_addr + CSR0 ); /* needs for PCI cards */ - outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - - -static void -timeout_change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ]; - if( ++nl->timeout_rxl >= 4 ) - nl->timeout_rxl = 0; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - inb( dev->base_addr + CSR0 ); - outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - -/* -------------------------------------------------------------------------- */ - -/* - * Open/initialize the board. - */ - -static int -sbni_open( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct timer_list *w = &nl->watchdog; - - /* - * For double ISA adapters within "common irq" mode, we have to - * determine whether primary or secondary channel is initialized, - * and set the irq handler only in first case. - */ - if( dev->base_addr < 0x400 ) { /* ISA only */ - struct net_device **p = sbni_cards; - for( ; *p && p < sbni_cards + SBNI_MAX_NUM_CARDS; ++p ) - if( (*p)->irq == dev->irq && - ((*p)->base_addr == dev->base_addr + 4 || - (*p)->base_addr == dev->base_addr - 4) && - (*p)->flags & IFF_UP ) { - - ((struct net_local *) (netdev_priv(*p))) - ->second = dev; - netdev_notice(dev, "using shared irq with %s\n", - (*p)->name); - nl->state |= FL_SECONDARY; - goto handler_attached; - } - } - - if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) { - netdev_err(dev, "unable to get IRQ %d\n", dev->irq); - return -EAGAIN; - } - -handler_attached: - - spin_lock( &nl->lock ); - memset( &dev->stats, 0, sizeof(struct net_device_stats) ); - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - - card_start( dev ); - - netif_start_queue( dev ); - - /* set timer watchdog */ - nl->watchdog_dev = dev; - timer_setup(w, sbni_watchdog, 0); - w->expires = jiffies + SBNI_TIMEOUT; - add_timer( w ); - - spin_unlock( &nl->lock ); - return 0; -} - - -static int -sbni_close( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->second && nl->second->flags & IFF_UP ) { - netdev_notice(dev, "Secondary channel (%s) is active!\n", - nl->second->name); - return -EBUSY; - } - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - emancipate( dev ); - else - while( nl->link ) /* it's master device! */ - emancipate( nl->link ); -#endif - - spin_lock( &nl->lock ); - - nl->second = NULL; - drop_xmit_queue( dev ); - netif_stop_queue( dev ); - - del_timer( &nl->watchdog ); - - outb( 0, dev->base_addr + CSR0 ); - - if( !(nl->state & FL_SECONDARY) ) - free_irq( dev->irq, dev ); - nl->state &= FL_SECONDARY; - - spin_unlock( &nl->lock ); - return 0; -} - - -/* - Valid combinations in CSR0 (for probing): - - VALID_DECODER 0000,0011,1011,1010 - - ; 0 ; - - TR_REQ ; 1 ; + - TR_RDY ; 2 ; - - TR_RDY TR_REQ ; 3 ; + - BU_EMP ; 4 ; + - BU_EMP TR_REQ ; 5 ; + - BU_EMP TR_RDY ; 6 ; - - BU_EMP TR_RDY TR_REQ ; 7 ; + - RC_RDY ; 8 ; + - RC_RDY TR_REQ ; 9 ; + - RC_RDY TR_RDY ; 10 ; - - RC_RDY TR_RDY TR_REQ ; 11 ; - - RC_RDY BU_EMP ; 12 ; - - RC_RDY BU_EMP TR_REQ ; 13 ; - - RC_RDY BU_EMP TR_RDY ; 14 ; - - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - -*/ - -#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) - - -static int -sbni_card_probe( unsigned long ioaddr ) -{ - unsigned char csr0; - - csr0 = inb( ioaddr + CSR0 ); - if( csr0 != 0xff && csr0 != 0x00 ) { - csr0 &= ~EN_INT; - if( csr0 & BU_EMP ) - csr0 |= EN_INT; - - if( VALID_DECODER & (1 << (csr0 >> 4)) ) - return 0; - } - - return -ENODEV; -} - -/* -------------------------------------------------------------------------- */ - -static int -sbni_ioctl( struct net_device *dev, struct ifreq *ifr, int cmd ) -{ - struct net_local *nl = netdev_priv(dev); - struct sbni_flags flags; - int error = 0; - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *slave_dev; - char slave_name[ 8 ]; -#endif - - switch( cmd ) { - case SIOCDEVGETINSTATS : - if (copy_to_user( ifr->ifr_data, &nl->in_stats, - sizeof(struct sbni_in_stats) )) - error = -EFAULT; - break; - - case SIOCDEVRESINSTATS : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - break; - - case SIOCDEVGHWSTATE : - flags.mac_addr = *(u32 *)(dev->dev_addr + 3); - flags.rate = nl->csr1.rate; - flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0; - flags.rxl = nl->cur_rxl_index; - flags.fixed_rxl = nl->delta_rxl == 0; - - if (copy_to_user( ifr->ifr_data, &flags, sizeof flags )) - error = -EFAULT; - break; - - case SIOCDEVSHWSTATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - spin_lock( &nl->lock ); - flags = *(struct sbni_flags*) &ifr->ifr_ifru; - if( flags.fixed_rxl ) { - nl->delta_rxl = 0; - nl->cur_rxl_index = flags.rxl; - } else { - nl->delta_rxl = DEF_RXL_DELTA; - nl->cur_rxl_index = DEF_RXL; - } - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - nl->csr1.rate = flags.rate; - outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - spin_unlock( &nl->lock ); - break; - -#ifdef CONFIG_SBNI_MULTILINE - - case SIOCDEVENSLAVE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name )) - return -EFAULT; - slave_dev = dev_get_by_name(&init_net, slave_name ); - if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { - netdev_err(dev, "trying to enslave non-active device %s\n", - slave_name); - if (slave_dev) - dev_put(slave_dev); - return -EPERM; - } - - return enslave( dev, slave_dev ); - - case SIOCDEVEMANSIPATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - return emancipate( dev ); - -#endif /* CONFIG_SBNI_MULTILINE */ - - default : - return -EOPNOTSUPP; - } - - return error; -} - - -#ifdef CONFIG_SBNI_MULTILINE - -static int -enslave( struct net_device *dev, struct net_device *slave_dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct net_local *snl = netdev_priv(slave_dev); - - if( nl->state & FL_SLAVE ) /* This isn't master or free device */ - return -EBUSY; - - if( snl->state & FL_SLAVE ) /* That was already enslaved */ - return -EBUSY; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - - /* append to list */ - snl->link = nl->link; - nl->link = slave_dev; - snl->master = dev; - snl->state |= FL_SLAVE; - - /* Summary statistics of MultiLine operation will be stored - in master's counters */ - memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) ); - netif_stop_queue( slave_dev ); - netif_wake_queue( dev ); /* Now we are able to transmit */ - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name); - return 0; -} - - -static int -emancipate( struct net_device *dev ) -{ - struct net_local *snl = netdev_priv(dev); - struct net_device *p = snl->master; - struct net_local *nl = netdev_priv(p); - - if( !(snl->state & FL_SLAVE) ) - return -EINVAL; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - drop_xmit_queue( dev ); - - /* exclude from list */ - for(;;) { /* must be in list */ - struct net_local *t = netdev_priv(p); - if( t->link == dev ) { - t->link = snl->link; - break; - } - p = t->link; - } - - snl->link = NULL; - snl->master = dev; - snl->state &= ~FL_SLAVE; - - netif_start_queue( dev ); - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - - dev_put( dev ); - return 0; -} - -#endif - -static void -set_multicast_list( struct net_device *dev ) -{ - return; /* sbni always operate in promiscuos mode */ -} - - -#ifdef MODULE -module_param_hw_array(io, int, ioport, NULL, 0); -module_param_hw_array(irq, int, irq, NULL, 0); -module_param_array(baud, int, NULL, 0); -module_param_array(rxl, int, NULL, 0); -module_param_array(mac, int, NULL, 0); -module_param(skip_pci_probe, bool, 0); - -MODULE_LICENSE("GPL"); - - -int __init init_module( void ) -{ - struct net_device *dev; - int err; - - while( num < SBNI_MAX_NUM_CARDS ) { - dev = alloc_netdev(sizeof(struct net_local), "sbni%d", - NET_NAME_UNKNOWN, sbni_devsetup); - if( !dev) - break; - - sprintf( dev->name, "sbni%d", num ); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - break; - } - - if( register_netdev( dev ) ) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev( dev ); - break; - } - } - - return *sbni_cards ? 0 : -ENODEV; -} - -void -cleanup_module(void) -{ - int i; - - for (i = 0; i < SBNI_MAX_NUM_CARDS; ++i) { - struct net_device *dev = sbni_cards[i]; - if (dev != NULL) { - unregister_netdev(dev); - release_region(dev->base_addr, SBNI_IO_EXTENT); - free_netdev(dev); - } - } -} - -#else /* MODULE */ - -static int __init -sbni_setup( char *p ) -{ - int n, parm; - - if( *p++ != '(' ) - goto bad_param; - - for( n = 0, parm = 0; *p && n < 8; ) { - (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 ); - if( !*p || *p == ')' ) - return 1; - if( *p == ';' ) { - ++p; - ++n; - parm = 0; - } else if( *p++ != ',' ) { - break; - } else { - if( ++parm >= 5 ) - break; - } - } -bad_param: - pr_err("Error in sbni kernel parameter!\n"); - return 0; -} - -__setup( "sbni=", sbni_setup ); - -#endif /* MODULE */ - -/* -------------------------------------------------------------------------- */ - -static u32 -calc_crc32( u32 crc, u8 *p, u32 len ) -{ - while( len-- ) - crc = CRC32( *p++, crc ); - - return crc; -} - -static u32 crc32tab[] __attribute__ ((aligned(8))) = { - 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, - 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, - 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, - 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, - 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, - 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, - 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, - 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, - 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, - 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, - 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, - 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, - 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, - 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, - 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, - 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, - 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, - 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, - 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, - 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, - 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, - 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, - 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, - 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, - 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, - 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, - 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, - 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, - 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, - 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, - 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, - 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, - 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, - 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, - 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, - 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, - 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, - 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, - 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, - 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, - 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, - 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, - 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, - 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, - 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, - 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, - 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, - 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, - 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, - 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, - 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, - 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, - 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, - 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, - 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, - 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, - 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, - 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, - 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, - 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, - 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, - 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, - 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, - 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 -}; - diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h deleted file mode 100644 index 84264510a8ed..000000000000 --- a/drivers/net/wan/sbni.h +++ /dev/null @@ -1,147 +0,0 @@ -/* sbni.h: definitions for a Granch SBNI12 driver, version 5.0.0 - * Written 2001 Denis I.Timofeev (timofeev@granch.ru) - * This file is distributed under the GNU GPL - */ - -#ifndef SBNI_H -#define SBNI_H - -#ifdef SBNI_DEBUG -#define DP( A ) A -#else -#define DP( A ) -#endif - - -/* We don't have official vendor id yet... */ -#define SBNI_PCI_VENDOR 0x55 -#define SBNI_PCI_DEVICE 0x9f - -#define ISA_MODE 0x00 -#define PCI_MODE 0x01 - -#define SBNI_IO_EXTENT 4 - -enum sbni_reg { - CSR0 = 0, - CSR1 = 1, - DAT = 2 -}; - -/* CSR0 mapping */ -enum { - BU_EMP = 0x02, - RC_CHK = 0x04, - CT_ZER = 0x08, - TR_REQ = 0x10, - TR_RDY = 0x20, - EN_INT = 0x40, - RC_RDY = 0x80 -}; - - -/* CSR1 mapping */ -#define PR_RES 0x80 - -struct sbni_csr1 { -#ifdef __LITTLE_ENDIAN_BITFIELD - u8 rxl : 5; - u8 rate : 2; - u8 : 1; -#else - u8 : 1; - u8 rate : 2; - u8 rxl : 5; -#endif -}; - -/* fields in frame header */ -#define FRAME_ACK_MASK (unsigned short)0x7000 -#define FRAME_LEN_MASK (unsigned short)0x03FF -#define FRAME_FIRST (unsigned short)0x8000 -#define FRAME_RETRY (unsigned short)0x0800 - -#define FRAME_SENT_BAD (unsigned short)0x4000 -#define FRAME_SENT_OK (unsigned short)0x3000 - - -/* state flags */ -enum { - FL_WAIT_ACK = 0x01, - FL_NEED_RESEND = 0x02, - FL_PREV_OK = 0x04, - FL_SLOW_MODE = 0x08, - FL_SECONDARY = 0x10, -#ifdef CONFIG_SBNI_MULTILINE - FL_SLAVE = 0x20, -#endif - FL_LINE_DOWN = 0x40 -}; - - -enum { - DEFAULT_IOBASEADDR = 0x210, - DEFAULT_INTERRUPTNUMBER = 5, - DEFAULT_RATE = 0, - DEFAULT_FRAME_LEN = 1012 -}; - -#define DEF_RXL_DELTA -1 -#define DEF_RXL 0xf - -#define SBNI_SIG 0x5a - -#define SBNI_MIN_LEN 60 /* Shortest Ethernet frame without FCS */ -#define SBNI_MAX_FRAME 1023 -#define ETHER_MAX_LEN 1518 - -#define SBNI_TIMEOUT (HZ/10) - -#define TR_ERROR_COUNT 32 -#define CHANGE_LEVEL_START_TICKS 4 - -#define SBNI_MAX_NUM_CARDS 16 - -/* internal SBNI-specific statistics */ -struct sbni_in_stats { - u32 all_rx_number; - u32 bad_rx_number; - u32 timeout_number; - u32 all_tx_number; - u32 resend_tx_number; -}; - -/* SBNI ioctl params */ -#define SIOCDEVGETINSTATS SIOCDEVPRIVATE -#define SIOCDEVRESINSTATS SIOCDEVPRIVATE+1 -#define SIOCDEVGHWSTATE SIOCDEVPRIVATE+2 -#define SIOCDEVSHWSTATE SIOCDEVPRIVATE+3 -#define SIOCDEVENSLAVE SIOCDEVPRIVATE+4 -#define SIOCDEVEMANSIPATE SIOCDEVPRIVATE+5 - - -/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */ -struct sbni_flags { - u32 rxl : 4; - u32 rate : 2; - u32 fixed_rxl : 1; - u32 slow_mode : 1; - u32 mac_addr : 24; -}; - -/* - * CRC-32 stuff - */ -#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF)) - /* CRC generator 0xEDB88320 */ - /* CRC remainder 0x2144DF1C */ - /* CRC initial value 0x00000000 */ -#define CRC32_REMAINDER 0x2144DF1C -#define CRC32_INITIAL 0x00000000 - -#ifndef __initdata -#define __initdata -#endif - -#endif - diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c index 4403e219ca03..eddd20aab691 100644 --- a/drivers/net/wan/sealevel.c +++ b/drivers/net/wan/sealevel.c @@ -124,14 +124,6 @@ static int sealevel_close(struct net_device *d) return 0; } -static int sealevel_ioctl(struct net_device *d, struct ifreq *ifr, int cmd) -{ - /* struct slvl_device *slvl=dev_to_chan(d); - * z8530_ioctl(d,&slvl->sync.chanA,ifr,cmd) - */ - return hdlc_ioctl(d, ifr, cmd); -} - /* Passed network frames, fire them downwind. */ static netdev_tx_t sealevel_queue_xmit(struct sk_buff *skb, @@ -152,7 +144,7 @@ static const struct net_device_ops sealevel_ops = { .ndo_open = sealevel_open, .ndo_stop = sealevel_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = sealevel_ioctl, + .ndo_siocwandev = hdlc_ioctl, }; static int slvl_setup(struct slvl_device *sv, int iobase, int irq) diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index f22e48415e6f..5a9e262188ef 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -343,20 +343,17 @@ static int wanxl_attach(struct net_device *dev, unsigned short encoding, return 0; } -static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int wanxl_ioctl(struct net_device *dev, struct if_settings *ifs) { const size_t size = sizeof(sync_serial_settings); sync_serial_settings line; struct port *port = dev_to_port(dev); - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); - - switch (ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_IFACE: - ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_SYNC_SERIAL; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } memset(&line, 0, sizeof(line)); @@ -364,7 +361,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) line.clock_rate = 0; line.loopback = 0; - if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &line, size)) + if (copy_to_user(ifs->ifs_ifsu.sync, &line, size)) return -EFAULT; return 0; @@ -374,7 +371,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev->flags & IFF_UP) return -EBUSY; - if (copy_from_user(&line, ifr->ifr_settings.ifs_ifsu.sync, + if (copy_from_user(&line, ifs->ifs_ifsu.sync, size)) return -EFAULT; @@ -389,7 +386,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -545,7 +542,7 @@ static const struct net_device_ops wanxl_ops = { .ndo_open = wanxl_open, .ndo_stop = wanxl_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = wanxl_ioctl, + .ndo_siocwandev = wanxl_ioctl, .ndo_get_stats = wanxl_get_stats, }; diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index fd37d4d2983b..65dd8cff1b01 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -1144,7 +1144,7 @@ static int waitbusy(struct airo_info *ai); static irqreturn_t airo_interrupt(int irq, void* dev_id); static int airo_thread(void *data); static void timer_func(struct net_device *dev); -static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int airo_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *, int cmd); static struct iw_statistics *airo_get_wireless_stats(struct net_device *dev); #ifdef CISCO_EXT static int readrids(struct net_device *dev, aironet_ioctl *comp); @@ -2664,7 +2664,7 @@ static const struct net_device_ops airo11_netdev_ops = { .ndo_start_xmit = airo_start_xmit11, .ndo_get_stats = airo_get_stats, .ndo_set_mac_address = airo_set_mac_address, - .ndo_do_ioctl = airo_ioctl, + .ndo_siocdevprivate = airo_siocdevprivate, }; static void wifi_setup(struct net_device *dev) @@ -2764,7 +2764,7 @@ static const struct net_device_ops airo_netdev_ops = { .ndo_get_stats = airo_get_stats, .ndo_set_rx_mode = airo_set_multicast_list, .ndo_set_mac_address = airo_set_mac_address, - .ndo_do_ioctl = airo_ioctl, + .ndo_siocdevprivate = airo_siocdevprivate, .ndo_validate_addr = eth_validate_addr, }; @@ -2775,7 +2775,7 @@ static const struct net_device_ops mpi_netdev_ops = { .ndo_get_stats = airo_get_stats, .ndo_set_rx_mode = airo_set_multicast_list, .ndo_set_mac_address = airo_set_mac_address, - .ndo_do_ioctl = airo_ioctl, + .ndo_siocdevprivate = airo_siocdevprivate, .ndo_validate_addr = eth_validate_addr, }; @@ -7661,7 +7661,8 @@ static const struct iw_handler_def airo_handler_def = * Javier Achirica did a great job of merging code from the unnamed CISCO * developer that added support for flashing the card. */ -static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int airo_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd) { int rc = 0; struct airo_info *ai = dev->ml_priv; @@ -7678,7 +7679,7 @@ static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { int val = AIROMAGIC; aironet_ioctl com; - if (copy_from_user(&com, rq->ifr_data, sizeof(com))) + if (copy_from_user(&com, data, sizeof(com))) rc = -EFAULT; else if (copy_to_user(com.data, (char *)&val, sizeof(val))) rc = -EFAULT; @@ -7694,7 +7695,7 @@ static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) */ { aironet_ioctl com; - if (copy_from_user(&com, rq->ifr_data, sizeof(com))) { + if (copy_from_user(&com, data, sizeof(com))) { rc = -EFAULT; break; } diff --git a/drivers/net/wireless/intersil/hostap/hostap.h b/drivers/net/wireless/intersil/hostap/hostap.h index c4b81ff7d7e4..c17ab6dbbb53 100644 --- a/drivers/net/wireless/intersil/hostap/hostap.h +++ b/drivers/net/wireless/intersil/hostap/hostap.h @@ -93,6 +93,7 @@ extern const struct iw_handler_def hostap_iw_handler_def; extern const struct ethtool_ops prism2_ethtool_ops; int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); - +int hostap_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); #endif /* HOSTAP_H */ diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c index 49766b285230..0a376f112db9 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c @@ -3941,7 +3941,8 @@ const struct iw_handler_def hostap_iw_handler_def = .get_wireless_stats = hostap_get_wireless_stats, }; - +/* Private ioctls (iwpriv) that have not yet been converted + * into new wireless extensions API */ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct iwreq *wrq = (struct iwreq *) ifr; @@ -3953,9 +3954,6 @@ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) local = iface->local; switch (cmd) { - /* Private ioctls (iwpriv) that have not yet been converted - * into new wireless extensions API */ - case PRISM2_IOCTL_INQUIRE: if (!capable(CAP_NET_ADMIN)) ret = -EPERM; else ret = prism2_ioctl_priv_inquire(dev, (int *) wrq->u.name); @@ -4009,11 +4007,31 @@ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) wrq->u.ap_addr.sa_data); break; #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */ + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} +/* Private ioctls that are not used with iwpriv; + * in SIOCDEVPRIVATE range */ +int hostap_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) +{ + struct iwreq *wrq = (struct iwreq *)ifr; + struct hostap_interface *iface; + local_info_t *local; + int ret = 0; - /* Private ioctls that are not used with iwpriv; - * in SIOCDEVPRIVATE range */ + iface = netdev_priv(dev); + local = iface->local; + + if (in_compat_syscall()) /* not implemented yet */ + return -EOPNOTSUPP; + switch (cmd) { #ifdef PRISM2_DOWNLOAD_SUPPORT case PRISM2_IOCTL_DOWNLOAD: if (!capable(CAP_NET_ADMIN)) ret = -EPERM; diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c index de97b3304115..54f67b682b6a 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_main.c +++ b/drivers/net/wireless/intersil/hostap/hostap_main.c @@ -797,6 +797,7 @@ static const struct net_device_ops hostap_netdev_ops = { .ndo_open = prism2_open, .ndo_stop = prism2_close, .ndo_do_ioctl = hostap_ioctl, + .ndo_siocdevprivate = hostap_siocdevprivate, .ndo_set_mac_address = prism2_set_mac_address, .ndo_set_rx_mode = hostap_set_multicast_list, .ndo_tx_timeout = prism2_tx_timeout, @@ -809,6 +810,7 @@ static const struct net_device_ops hostap_mgmt_netdev_ops = { .ndo_open = prism2_open, .ndo_stop = prism2_close, .ndo_do_ioctl = hostap_ioctl, + .ndo_siocdevprivate = hostap_siocdevprivate, .ndo_set_mac_address = prism2_set_mac_address, .ndo_set_rx_mode = hostap_set_multicast_list, .ndo_tx_timeout = prism2_tx_timeout, @@ -821,6 +823,7 @@ static const struct net_device_ops hostap_master_ops = { .ndo_open = prism2_open, .ndo_stop = prism2_close, .ndo_do_ioctl = hostap_ioctl, + .ndo_siocdevprivate = hostap_siocdevprivate, .ndo_set_mac_address = prism2_set_mac_address, .ndo_set_rx_mode = hostap_set_multicast_list, .ndo_tx_timeout = prism2_tx_timeout, diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig index de9384326bc8..77dbfc418bce 100644 --- a/drivers/net/wwan/Kconfig +++ b/drivers/net/wwan/Kconfig @@ -38,6 +38,18 @@ config MHI_WWAN_CTRL To compile this driver as a module, choose M here: the module will be called mhi_wwan_ctrl. +config MHI_WWAN_MBIM + tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems" + depends on MHI_BUS + help + MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems. + It implements MBIM over MHI, for IP data aggregation and muxing. + A default wwan0 network interface is created for MBIM data session + ID 0. Additional links can be created via wwan rtnetlink type. + + To compile this driver as a module, choose M here: the module will be + called mhi_wwan_mbim. + config RPMSG_WWAN_CTRL tristate "RPMSG WWAN control driver" depends on RPMSG diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile index d90ac33abaef..fe51feedac21 100644 --- a/drivers/net/wwan/Makefile +++ b/drivers/net/wwan/Makefile @@ -9,5 +9,6 @@ wwan-objs += wwan_core.o obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o +obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o obj-$(CONFIG_IOSM) += iosm/ diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index 7f7d364d3a51..2fe88b8be348 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -479,6 +479,7 @@ static struct pci_driver iosm_ipc_driver = { }, .id_table = iosm_ipc_ids, }; +module_pci_driver(iosm_ipc_driver); int ipc_pcie_addr_map(struct iosm_pcie *ipc_pcie, unsigned char *data, size_t size, dma_addr_t *mapping, int direction) @@ -560,21 +561,3 @@ void ipc_pcie_kfree_skb(struct iosm_pcie *ipc_pcie, struct sk_buff *skb) IPC_CB(skb)->mapping = 0; dev_kfree_skb(skb); } - -static int __init iosm_ipc_driver_init(void) -{ - if (pci_register_driver(&iosm_ipc_driver)) { - pr_err("registering of IOSM PCIe driver failed"); - return -1; - } - - return 0; -} - -static void __exit iosm_ipc_driver_exit(void) -{ - pci_unregister_driver(&iosm_ipc_driver); -} - -module_init(iosm_ipc_driver_init); -module_exit(iosm_ipc_driver_exit); diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c new file mode 100644 index 000000000000..f37232fb29c0 --- /dev/null +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -0,0 +1,657 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* MHI MBIM Network driver - Network/MBIM over MHI bus + * + * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> + * + * This driver copy some code from cdc_ncm, which is: + * Copyright (C) ST-Ericsson 2010-2012 + * and cdc_mbim, which is: + * Copyright (c) 2012 Smith Micro Software, Inc. + * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> + * + */ + +#include <linux/ethtool.h> +#include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/mhi.h> +#include <linux/mii.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/u64_stats_sync.h> +#include <linux/usb.h> +#include <linux/usb/cdc.h> +#include <linux/usb/usbnet.h> +#include <linux/usb/cdc_ncm.h> +#include <linux/wwan.h> + +/* 3500 allows to optimize skb allocation, the skbs will basically fit in + * one 4K page. Large MBIM packets will simply be split over several MHI + * transfers and chained by the MHI net layer (zerocopy). + */ +#define MHI_DEFAULT_MRU 3500 + +#define MHI_MBIM_DEFAULT_MTU 1500 +#define MHI_MAX_BUF_SZ 0xffff + +#define MBIM_NDP16_SIGN_MASK 0x00ffffff + +#define MHI_MBIM_LINK_HASH_SIZE 8 +#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE) + +struct mhi_mbim_link { + struct mhi_mbim_context *mbim; + struct net_device *ndev; + unsigned int session; + + /* stats */ + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; + + struct hlist_node hlnode; +}; + +struct mhi_mbim_context { + struct mhi_device *mdev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; + unsigned int mru; + u32 rx_queue_sz; + u16 rx_seq; + u16 tx_seq; + struct delayed_work rx_refill; + spinlock_t tx_lock; + struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE]; +}; + +struct mbim_tx_hdr { + struct usb_cdc_ncm_nth16 nth16; + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16[2]; +} __packed; + +static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim, + unsigned int session) +{ + struct mhi_mbim_link *link; + + hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) { + if (link->session == session) + return link; + } + + return NULL; +} + +static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session, + u16 tx_seq) +{ + unsigned int dgram_size = skb->len; + struct usb_cdc_ncm_nth16 *nth16; + struct usb_cdc_ncm_ndp16 *ndp16; + struct mbim_tx_hdr *mbim_hdr; + + /* Only one NDP is sent, containing the IP packet (no aggregation) */ + + /* Ensure we have enough headroom for crafting MBIM header */ + if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { + dev_kfree_skb_any(skb); + return NULL; + } + + mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); + + /* Fill NTB header */ + nth16 = &mbim_hdr->nth16; + nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); + nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + nth16->wSequence = cpu_to_le16(tx_seq); + nth16->wBlockLength = cpu_to_le16(skb->len); + nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + + /* Fill the unique NDP */ + ndp16 = &mbim_hdr->ndp16; + ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24)); + ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + + sizeof(struct usb_cdc_ncm_dpe16) * 2); + ndp16->wNextNdpIndex = 0; + + /* Datagram follows the mbim header */ + ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); + ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); + + /* null termination */ + ndp16->dpe16[1].wDatagramIndex = 0; + ndp16->dpe16[1].wDatagramLength = 0; + + return skb; +} + +static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + struct mhi_mbim_context *mbim = link->mbim; + unsigned long flags; + int err = -ENOMEM; + + /* Serialize MHI channel queuing and MBIM seq */ + spin_lock_irqsave(&mbim->tx_lock, flags); + + skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq); + if (unlikely(!skb)) + goto exit_unlock; + + err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); + + if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE)) + netif_stop_queue(ndev); + + if (!err) + mbim->tx_seq++; + +exit_unlock: + spin_unlock_irqrestore(&mbim->tx_lock, flags); + + if (unlikely(err)) { + net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", + ndev->name, err); + dev_kfree_skb_any(skb); + goto exit_drop; + } + + return NETDEV_TX_OK; + +exit_drop: + u64_stats_update_begin(&link->tx_syncp); + u64_stats_inc(&link->tx_dropped); + u64_stats_update_end(&link->tx_syncp); + + return NETDEV_TX_OK; +} + +static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb) +{ + struct usb_cdc_ncm_nth16 *nth16; + int len; + + if (skb->len < sizeof(struct usb_cdc_ncm_nth16) + + sizeof(struct usb_cdc_ncm_ndp16)) { + net_err_ratelimited("frame too short\n"); + return -EINVAL; + } + + nth16 = (struct usb_cdc_ncm_nth16 *)skb->data; + + if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { + net_err_ratelimited("invalid NTH16 signature <%#010x>\n", + le32_to_cpu(nth16->dwSignature)); + return -EINVAL; + } + + /* No limit on the block length, except the size of the data pkt */ + len = le16_to_cpu(nth16->wBlockLength); + if (len > skb->len) { + net_err_ratelimited("NTB does not fit into the skb %u/%u\n", + len, skb->len); + return -EINVAL; + } + + if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && + (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) && + !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { + net_err_ratelimited("sequence number glitch prev=%d curr=%d\n", + mbim->rx_seq, le16_to_cpu(nth16->wSequence)); + } + mbim->rx_seq = le16_to_cpu(nth16->wSequence); + + return le16_to_cpu(nth16->wNdpIndex); +} + +static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16) +{ + int ret; + + if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { + net_err_ratelimited("invalid DPT16 length <%u>\n", + le16_to_cpu(ndp16->wLength)); + return -EINVAL; + } + + ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) + / sizeof(struct usb_cdc_ncm_dpe16)); + ret--; /* Last entry is always a NULL terminator */ + + if (sizeof(struct usb_cdc_ncm_ndp16) + + ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) { + net_err_ratelimited("Invalid nframes = %d\n", ret); + return -EINVAL; + } + + return ret; +} + +static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb) +{ + int ndpoffset; + + /* Check NTB header and retrieve first NDP offset */ + ndpoffset = mbim_rx_verify_nth16(mbim, skb); + if (ndpoffset < 0) { + net_err_ratelimited("mbim: Incorrect NTB header\n"); + goto error; + } + + /* Process each NDP */ + while (1) { + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16; + struct mhi_mbim_link *link; + int nframes, n, dpeoffset; + unsigned int session; + + if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) { + net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n", + ndpoffset); + goto error; + } + + /* Check NDP header and retrieve number of datagrams */ + nframes = mbim_rx_verify_ndp16(skb, &ndp16); + if (nframes < 0) { + net_err_ratelimited("mbim: Incorrect NDP16\n"); + goto error; + } + + /* Only IP data type supported, no DSS in MHI context */ + if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) + != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { + net_err_ratelimited("mbim: Unsupported NDP type\n"); + goto next_ndp; + } + + session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24; + + rcu_read_lock(); + + link = mhi_mbim_get_link_rcu(mbim, session); + if (!link) { + net_err_ratelimited("mbim: bad packet session (%u)\n", session); + goto unlock; + } + + /* de-aggregate and deliver IP packets */ + dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16); + for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) { + u16 dgram_offset, dgram_len; + struct sk_buff *skbn; + + if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16))) + break; + + dgram_offset = le16_to_cpu(dpe16.wDatagramIndex); + dgram_len = le16_to_cpu(dpe16.wDatagramLength); + + if (!dgram_offset || !dgram_len) + break; /* null terminator */ + + skbn = netdev_alloc_skb(link->ndev, dgram_len); + if (!skbn) + continue; + + skb_put(skbn, dgram_len); + skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len); + + switch (skbn->data[0] & 0xf0) { + case 0x40: + skbn->protocol = htons(ETH_P_IP); + break; + case 0x60: + skbn->protocol = htons(ETH_P_IPV6); + break; + default: + net_err_ratelimited("%s: unknown protocol\n", + link->ndev->name); + dev_kfree_skb_any(skbn); + u64_stats_update_begin(&link->rx_syncp); + u64_stats_inc(&link->rx_errors); + u64_stats_update_end(&link->rx_syncp); + continue; + } + + u64_stats_update_begin(&link->rx_syncp); + u64_stats_inc(&link->rx_packets); + u64_stats_add(&link->rx_bytes, skbn->len); + u64_stats_update_end(&link->rx_syncp); + + netif_rx(skbn); + } +unlock: + rcu_read_unlock(); +next_ndp: + /* Other NDP to process? */ + ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex); + if (!ndpoffset) + break; + } + + /* free skb */ + dev_consume_skb_any(skb); + return; +error: + dev_kfree_skb_any(skb); +} + +static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim, + struct sk_buff *skb) +{ + struct sk_buff *head = mbim->skbagg_head; + struct sk_buff *tail = mbim->skbagg_tail; + + /* This is non-paged skb chaining using frag_list */ + if (!head) { + mbim->skbagg_head = skb; + return skb; + } + + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = skb; + else + tail->next = skb; + + head->len += skb->len; + head->data_len += skb->len; + head->truesize += skb->truesize; + + mbim->skbagg_tail = skb; + + return mbim->skbagg_head; +} + +static void mhi_net_rx_refill_work(struct work_struct *work) +{ + struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context, + rx_refill.work); + struct mhi_device *mdev = mbim->mdev; + int err; + + while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) { + struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL); + + if (unlikely(!skb)) + break; + + err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, + MHI_DEFAULT_MRU, MHI_EOT); + if (unlikely(err)) { + kfree_skb(skb); + break; + } + + /* Do not hog the CPU if rx buffers are consumed faster than + * queued (unlikely). + */ + cond_resched(); + } + + /* If we're still starved of rx buffers, reschedule later */ + if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz) + schedule_delayed_work(&mbim->rx_refill, HZ / 2); +} + +static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct sk_buff *skb = mhi_res->buf_addr; + int free_desc_count; + + free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + + if (unlikely(mhi_res->transaction_status)) { + switch (mhi_res->transaction_status) { + case -EOVERFLOW: + /* Packet has been split over multiple transfers */ + skb_put(skb, mhi_res->bytes_xferd); + mhi_net_skb_agg(mbim, skb); + break; + case -ENOTCONN: + /* MHI layer stopping/resetting the DL channel */ + dev_kfree_skb_any(skb); + return; + default: + /* Unknown error, simply drop */ + dev_kfree_skb_any(skb); + } + } else { + skb_put(skb, mhi_res->bytes_xferd); + + if (mbim->skbagg_head) { + /* Aggregate the final fragment */ + skb = mhi_net_skb_agg(mbim, skb); + mbim->skbagg_head = NULL; + } + + mhi_mbim_rx(mbim, skb); + } + + /* Refill if RX buffers queue becomes low */ + if (free_desc_count >= mbim->rx_queue_sz / 2) + schedule_delayed_work(&mbim->rx_refill, 0); +} + +static void mhi_mbim_ndo_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&link->rx_syncp); + stats->rx_packets = u64_stats_read(&link->rx_packets); + stats->rx_bytes = u64_stats_read(&link->rx_bytes); + stats->rx_errors = u64_stats_read(&link->rx_errors); + } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start)); + + do { + start = u64_stats_fetch_begin_irq(&link->tx_syncp); + stats->tx_packets = u64_stats_read(&link->tx_packets); + stats->tx_bytes = u64_stats_read(&link->tx_bytes); + stats->tx_errors = u64_stats_read(&link->tx_errors); + stats->tx_dropped = u64_stats_read(&link->tx_dropped); + } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start)); +} + +static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct sk_buff *skb = mhi_res->buf_addr; + struct net_device *ndev = skb->dev; + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + /* Hardware has consumed the buffer, so free the skb (which is not + * freed by the MHI stack) and perform accounting. + */ + dev_consume_skb_any(skb); + + u64_stats_update_begin(&link->tx_syncp); + if (unlikely(mhi_res->transaction_status)) { + /* MHI layer stopping/resetting the UL channel */ + if (mhi_res->transaction_status == -ENOTCONN) { + u64_stats_update_end(&link->tx_syncp); + return; + } + + u64_stats_inc(&link->tx_errors); + } else { + u64_stats_inc(&link->tx_packets); + u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd); + } + u64_stats_update_end(&link->tx_syncp); + + if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE)) + netif_wake_queue(ndev); +} + +static int mhi_mbim_ndo_open(struct net_device *ndev) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + /* Feed the MHI rx buffer pool */ + schedule_delayed_work(&link->mbim->rx_refill, 0); + + /* Carrier is established via out-of-band channel (e.g. qmi) */ + netif_carrier_on(ndev); + + netif_start_queue(ndev); + + return 0; +} + +static int mhi_mbim_ndo_stop(struct net_device *ndev) +{ + netif_stop_queue(ndev); + netif_carrier_off(ndev); + + return 0; +} + +static const struct net_device_ops mhi_mbim_ndo = { + .ndo_open = mhi_mbim_ndo_open, + .ndo_stop = mhi_mbim_ndo_stop, + .ndo_start_xmit = mhi_mbim_ndo_xmit, + .ndo_get_stats64 = mhi_mbim_ndo_get_stats64, +}; + +static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id, + struct netlink_ext_ack *extack) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + struct mhi_mbim_context *mbim = ctxt; + + link->session = if_id; + link->mbim = mbim; + link->ndev = ndev; + u64_stats_init(&link->rx_syncp); + u64_stats_init(&link->tx_syncp); + + rcu_read_lock(); + if (mhi_mbim_get_link_rcu(mbim, if_id)) { + rcu_read_unlock(); + return -EEXIST; + } + rcu_read_unlock(); + + /* Already protected by RTNL lock */ + hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]); + + return register_netdevice(ndev); +} + +static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev, + struct list_head *head) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + hlist_del_init_rcu(&link->hlnode); + synchronize_rcu(); + + unregister_netdevice_queue(ndev, head); +} + +static void mhi_mbim_setup(struct net_device *ndev) +{ + ndev->header_ops = NULL; /* No header */ + ndev->type = ARPHRD_RAWIP; + ndev->needed_headroom = sizeof(struct mbim_tx_hdr); + ndev->hard_header_len = 0; + ndev->addr_len = 0; + ndev->flags = IFF_POINTOPOINT | IFF_NOARP; + ndev->netdev_ops = &mhi_mbim_ndo; + ndev->mtu = MHI_MBIM_DEFAULT_MTU; + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom; + ndev->tx_queue_len = 1000; +} + +static const struct wwan_ops mhi_mbim_wwan_ops = { + .priv_size = sizeof(struct mhi_mbim_link), + .setup = mhi_mbim_setup, + .newlink = mhi_mbim_newlink, + .dellink = mhi_mbim_dellink, +}; + +static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + struct mhi_mbim_context *mbim; + int err; + + mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL); + if (!mbim) + return -ENOMEM; + + dev_set_drvdata(&mhi_dev->dev, mbim); + mbim->mdev = mhi_dev; + mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU; + + INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work); + + /* Start MHI channels */ + err = mhi_prepare_for_transfer(mhi_dev, 0); + if (err) + return err; + + /* Number of transfer descriptors determines size of the queue */ + mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + + /* Register wwan link ops with MHI controller representing WWAN instance */ + return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0); +} + +static void mhi_mbim_remove(struct mhi_device *mhi_dev) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + + mhi_unprepare_from_transfer(mhi_dev); + cancel_delayed_work_sync(&mbim->rx_refill); + wwan_unregister_ops(&cntrl->mhi_dev->dev); + kfree_skb(mbim->skbagg_head); + dev_set_drvdata(&mhi_dev->dev, NULL); +} + +static const struct mhi_device_id mhi_mbim_id_table[] = { + /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */ + { .chan = "IP_HW0_MBIM", .driver_data = 0 }, + {} +}; +MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table); + +static struct mhi_driver mhi_mbim_driver = { + .probe = mhi_mbim_probe, + .remove = mhi_mbim_remove, + .dl_xfer_cb = mhi_mbim_dl_callback, + .ul_xfer_cb = mhi_mbim_ul_callback, + .id_table = mhi_mbim_id_table, + .driver = { + .name = "mhi_wwan_mbim", + .owner = THIS_MODULE, + }, +}; + +module_mhi_driver(mhi_mbim_driver); + +MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>"); +MODULE_DESCRIPTION("Network/MBIM over MHI"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index 528745862738..c6b3334f24c9 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -38,7 +38,7 @@ #define NCI_OP_PROP_SET_PDATA_OID 0x23 struct fdp_nci_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; struct fdp_i2c_phy *phy; struct nci_dev *ndev; @@ -52,7 +52,7 @@ struct fdp_nci_info { u32 limited_otp_version; u8 key_index; - u8 *fw_vsc_cfg; + const u8 *fw_vsc_cfg; u8 clock_type; u32 clock_freq; @@ -65,7 +65,7 @@ struct fdp_nci_info { wait_queue_head_t setup_wq; }; -static u8 nci_core_get_config_otp_ram_version[5] = { +static const u8 nci_core_get_config_otp_ram_version[5] = { 0x04, NCI_PARAM_ID_FW_RAM_VERSION, NCI_PARAM_ID_FW_OTP_VERSION, @@ -111,7 +111,7 @@ static inline int fdp_nci_patch_cmd(struct nci_dev *ndev, u8 type) } static inline int fdp_nci_set_production_data(struct nci_dev *ndev, u8 len, - char *data) + const char *data) { return nci_prop_cmd(ndev, NCI_OP_PROP_SET_PDATA_OID, len, data); } @@ -236,7 +236,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type) static int fdp_nci_open(struct nci_dev *ndev) { - struct fdp_nci_info *info = nci_get_drvdata(ndev); + const struct fdp_nci_info *info = nci_get_drvdata(ndev); return info->phy_ops->enable(info->phy); } @@ -260,7 +260,7 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev) { struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = &info->phy->i2c_dev->dev; - u8 *data; + const u8 *data; int r; r = request_firmware(&info->ram_patch, FDP_RAM_PATCH_NAME, dev); @@ -269,15 +269,15 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev) return r; } - data = (u8 *) info->ram_patch->data; + data = info->ram_patch->data; info->ram_patch_version = data[FDP_FW_HEADER_SIZE] | (data[FDP_FW_HEADER_SIZE + 1] << 8) | (data[FDP_FW_HEADER_SIZE + 2] << 16) | (data[FDP_FW_HEADER_SIZE + 3] << 24); - dev_dbg(dev, "RAM patch version: %d, size: %d\n", - info->ram_patch_version, (int) info->ram_patch->size); + dev_dbg(dev, "RAM patch version: %d, size: %zu\n", + info->ram_patch_version, info->ram_patch->size); r = request_firmware(&info->otp_patch, FDP_OTP_PATCH_NAME, dev); @@ -293,8 +293,8 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev) (data[FDP_FW_HEADER_SIZE+2] << 16) | (data[FDP_FW_HEADER_SIZE+3] << 24); - dev_dbg(dev, "OTP patch version: %d, size: %d\n", - info->otp_patch_version, (int) info->otp_patch->size); + dev_dbg(dev, "OTP patch version: %d, size: %zu\n", + info->otp_patch_version, info->otp_patch->size); return 0; } @@ -610,8 +610,9 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev, { struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = &info->phy->i2c_dev->dev; - struct nci_core_get_config_rsp *rsp = (void *) skb->data; - u8 i, *p; + const struct nci_core_get_config_rsp *rsp = (void *) skb->data; + unsigned int i; + const u8 *p; if (rsp->status == NCI_STATUS_OK) { @@ -651,7 +652,7 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev, return 0; } -static struct nci_driver_ops fdp_core_ops[] = { +static const struct nci_driver_ops fdp_core_ops[] = { { .opcode = NCI_OP_CORE_GET_CONFIG_RSP, .rsp = fdp_nci_core_get_config_rsp_packet, @@ -662,7 +663,7 @@ static struct nci_driver_ops fdp_core_ops[] = { }, }; -static struct nci_driver_ops fdp_prop_ops[] = { +static const struct nci_driver_ops fdp_prop_ops[] = { { .opcode = nci_opcode_pack(NCI_GID_PROP, NCI_OP_PROP_PATCH_OID), .rsp = fdp_nci_prop_patch_rsp_packet, @@ -675,7 +676,7 @@ static struct nci_driver_ops fdp_prop_ops[] = { }, }; -static struct nci_ops nci_ops = { +static const struct nci_ops nci_ops = { .open = fdp_nci_open, .close = fdp_nci_close, .send = fdp_nci_send, @@ -687,10 +688,10 @@ static struct nci_ops nci_ops = { .n_core_ops = ARRAY_SIZE(fdp_core_ops), }; -int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, +int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops, struct nci_dev **ndevp, int tx_headroom, int tx_tailroom, u8 clock_type, u32 clock_freq, - u8 *fw_vsc_cfg) + const u8 *fw_vsc_cfg) { struct device *dev = &phy->i2c_dev->dev; struct fdp_nci_info *info; @@ -718,6 +719,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO15693_MASK; + BUILD_BUG_ON(ARRAY_SIZE(fdp_prop_ops) > NCI_MAX_PROPRIETARY_CMD); ndev = nci_allocate_device(&nci_ops, protocols, tx_headroom, tx_tailroom); if (!ndev) { diff --git a/drivers/nfc/fdp/fdp.h b/drivers/nfc/fdp/fdp.h index ead3b21ccae6..2e9161a4d7bf 100644 --- a/drivers/nfc/fdp/fdp.h +++ b/drivers/nfc/fdp/fdp.h @@ -21,9 +21,9 @@ struct fdp_i2c_phy { uint16_t next_read_size; }; -int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, +int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops, struct nci_dev **ndev, int tx_headroom, int tx_tailroom, - u8 clock_type, u32 clock_freq, u8 *fw_vsc_cfg); + u8 clock_type, u32 clock_freq, const u8 *fw_vsc_cfg); void fdp_nci_remove(struct nci_dev *ndev); #endif /* __LOCAL_FDP_H_ */ diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index c5596e514648..051c43a2a52f 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -36,7 +36,7 @@ print_hex_dump(KERN_DEBUG, prefix": ", DUMP_PREFIX_OFFSET, \ 16, 1, (skb)->data, (skb)->len, 0) -static void fdp_nci_i2c_reset(struct fdp_i2c_phy *phy) +static void fdp_nci_i2c_reset(const struct fdp_i2c_phy *phy) { /* Reset RST/WakeUP for at least 100 micro-second */ gpiod_set_value_cansleep(phy->power_gpio, FDP_POWER_OFF); @@ -47,7 +47,7 @@ static void fdp_nci_i2c_reset(struct fdp_i2c_phy *phy) static int fdp_nci_i2c_enable(void *phy_id) { - struct fdp_i2c_phy *phy = phy_id; + const struct fdp_i2c_phy *phy = phy_id; fdp_nci_i2c_reset(phy); @@ -56,7 +56,7 @@ static int fdp_nci_i2c_enable(void *phy_id) static void fdp_nci_i2c_disable(void *phy_id) { - struct fdp_i2c_phy *phy = phy_id; + const struct fdp_i2c_phy *phy = phy_id; fdp_nci_i2c_reset(phy); } @@ -120,7 +120,7 @@ static int fdp_nci_i2c_write(void *phy_id, struct sk_buff *skb) return r; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = fdp_nci_i2c_write, .enable = fdp_nci_i2c_enable, .disable = fdp_nci_i2c_disable, diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c index e56cea716cd2..f9cca885beec 100644 --- a/drivers/nfc/mei_phy.c +++ b/drivers/nfc/mei_phy.c @@ -202,7 +202,7 @@ err: return r; } -static int mei_nfc_send(struct nfc_mei_phy *phy, u8 *buf, size_t length) +static int mei_nfc_send(struct nfc_mei_phy *phy, const u8 *buf, size_t length) { struct mei_nfc_hdr *hdr; u8 *mei_buf; @@ -362,7 +362,7 @@ static void nfc_mei_phy_disable(void *phy_id) phy->powered = 0; } -struct nfc_phy_ops mei_phy_ops = { +const struct nfc_phy_ops mei_phy_ops = { .write = nfc_mei_phy_write, .enable = nfc_mei_phy_enable, .disable = nfc_mei_phy_disable, diff --git a/drivers/nfc/mei_phy.h b/drivers/nfc/mei_phy.h index 51bd44f5f3b8..2b1edb3eba15 100644 --- a/drivers/nfc/mei_phy.h +++ b/drivers/nfc/mei_phy.h @@ -45,7 +45,7 @@ struct nfc_mei_phy { int hard_fault; }; -extern struct nfc_phy_ops mei_phy_ops; +extern const struct nfc_phy_ops mei_phy_ops; struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device); void nfc_mei_phy_free(struct nfc_mei_phy *phy); diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c index dd78d987e6c9..86f593c73ed6 100644 --- a/drivers/nfc/microread/i2c.c +++ b/drivers/nfc/microread/i2c.c @@ -73,7 +73,7 @@ static void microread_i2c_remove_len_crc(struct sk_buff *skb) skb_trim(skb, MICROREAD_I2C_FRAME_TAILROOM); } -static int check_crc(struct sk_buff *skb) +static int check_crc(const struct sk_buff *skb) { int i; u8 crc = 0; @@ -225,7 +225,7 @@ static irqreturn_t microread_i2c_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = microread_i2c_write, .enable = microread_i2c_enable, .disable = microread_i2c_disable, diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c index b1d3975e8a81..9d83ccebd434 100644 --- a/drivers/nfc/microread/microread.c +++ b/drivers/nfc/microread/microread.c @@ -131,7 +131,7 @@ #define MICROREAD_ELT_ID_SE2 0x04 #define MICROREAD_ELT_ID_SE3 0x05 -static struct nfc_hci_gate microread_gates[] = { +static const struct nfc_hci_gate microread_gates[] = { {MICROREAD_GATE_ID_ADM, MICROREAD_PIPE_ID_ADMIN}, {MICROREAD_GATE_ID_LOOPBACK, MICROREAD_PIPE_ID_HDS_LOOPBACK}, {MICROREAD_GATE_ID_IDT, MICROREAD_PIPE_ID_HDS_IDT}, @@ -152,7 +152,7 @@ static struct nfc_hci_gate microread_gates[] = { #define MICROREAD_CMD_TAILROOM 2 struct microread_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; void *phy_id; struct nfc_hci_dev *hdev; @@ -358,7 +358,7 @@ static int microread_complete_target_discovered(struct nfc_hci_dev *hdev, static void microread_im_transceive_cb(void *context, struct sk_buff *skb, int err) { - struct microread_info *info = context; + const struct microread_info *info = context; switch (info->async_cb_type) { case MICROREAD_CB_TYPE_READER_ALL: @@ -625,7 +625,7 @@ static int microread_event_received(struct nfc_hci_dev *hdev, u8 pipe, return r; } -static struct nfc_hci_ops microread_hci_ops = { +static const struct nfc_hci_ops microread_hci_ops = { .open = microread_open, .close = microread_close, .hci_ready = microread_hci_ready, @@ -641,9 +641,9 @@ static struct nfc_hci_ops microread_hci_ops = { .event_received = microread_event_received, }; -int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - struct nfc_hci_dev **hdev) +int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + const char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev) { struct microread_info *info; unsigned long quirks = 0; diff --git a/drivers/nfc/microread/microread.h b/drivers/nfc/microread/microread.h index 044f5e456375..2ee7ccfa22dd 100644 --- a/drivers/nfc/microread/microread.h +++ b/drivers/nfc/microread/microread.h @@ -10,9 +10,9 @@ #define DRIVER_DESC "NFC driver for microread" -int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - struct nfc_hci_dev **hdev); +int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + const char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev); void microread_remove(struct nfc_hci_dev *hdev); diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c index aaccb8b76b3e..edac56b01fd1 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.c +++ b/drivers/nfc/nfcmrvl/fw_dnld.c @@ -129,7 +129,7 @@ static void fw_dnld_timeout(struct timer_list *t) } static int process_state_reset(struct nfcmrvl_private *priv, - struct sk_buff *skb) + const struct sk_buff *skb) { if (sizeof(nci_pattern_core_reset_ntf) != skb->len || memcmp(skb->data, nci_pattern_core_reset_ntf, @@ -145,7 +145,8 @@ static int process_state_reset(struct nfcmrvl_private *priv, return 0; } -static int process_state_init(struct nfcmrvl_private *priv, struct sk_buff *skb) +static int process_state_init(struct nfcmrvl_private *priv, + const struct sk_buff *skb) { struct nci_core_set_config_cmd cmd; @@ -175,7 +176,7 @@ static void create_lc(struct nfcmrvl_private *priv) } static int process_state_set_ref_clock(struct nfcmrvl_private *priv, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nci_core_set_config_cmd cmd; @@ -221,7 +222,7 @@ static int process_state_set_ref_clock(struct nfcmrvl_private *priv, } static int process_state_set_hi_config(struct nfcmrvl_private *priv, - struct sk_buff *skb) + const struct sk_buff *skb) { if (sizeof(nci_pattern_core_set_config_rsp) != skb->len || memcmp(skb->data, nci_pattern_core_set_config_rsp, skb->len)) @@ -232,7 +233,7 @@ static int process_state_set_hi_config(struct nfcmrvl_private *priv, } static int process_state_open_lc(struct nfcmrvl_private *priv, - struct sk_buff *skb) + const struct sk_buff *skb) { if (sizeof(nci_pattern_core_conn_create_rsp) >= skb->len || memcmp(skb->data, nci_pattern_core_conn_create_rsp, @@ -347,7 +348,7 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv, } static int process_state_close_lc(struct nfcmrvl_private *priv, - struct sk_buff *skb) + const struct sk_buff *skb) { if (sizeof(nci_pattern_core_conn_close_rsp) != skb->len || memcmp(skb->data, nci_pattern_core_conn_close_rsp, skb->len)) @@ -358,7 +359,8 @@ static int process_state_close_lc(struct nfcmrvl_private *priv, return 0; } -static int process_state_boot(struct nfcmrvl_private *priv, struct sk_buff *skb) +static int process_state_boot(struct nfcmrvl_private *priv, + const struct sk_buff *skb) { if (sizeof(nci_pattern_proprietary_boot_rsp) != skb->len || memcmp(skb->data, nci_pattern_proprietary_boot_rsp, skb->len)) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 59a529e72d96..c38b228006fd 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -146,7 +146,7 @@ static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv, { } -static struct nfcmrvl_if_ops i2c_ops = { +static const struct nfcmrvl_if_ops i2c_ops = { .nci_open = nfcmrvl_i2c_nci_open, .nci_close = nfcmrvl_i2c_nci_close, .nci_send = nfcmrvl_i2c_nci_send, @@ -182,8 +182,8 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, static int nfcmrvl_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { + const struct nfcmrvl_platform_data *pdata; struct nfcmrvl_i2c_drv_data *drv_data; - struct nfcmrvl_platform_data *pdata; struct nfcmrvl_platform_data config; int ret; diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index a4620b480c4f..2fcf545012b1 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -81,7 +81,7 @@ static int nfcmrvl_nci_fw_download(struct nci_dev *ndev, return nfcmrvl_fw_dnld_start(ndev, firmware_name); } -static struct nci_ops nfcmrvl_nci_ops = { +static const struct nci_ops nfcmrvl_nci_ops = { .open = nfcmrvl_nci_open, .close = nfcmrvl_nci_close, .send = nfcmrvl_nci_send, @@ -91,9 +91,9 @@ static struct nci_ops nfcmrvl_nci_ops = { struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, void *drv_data, - struct nfcmrvl_if_ops *ops, + const struct nfcmrvl_if_ops *ops, struct device *dev, - struct nfcmrvl_platform_data *pdata) + const struct nfcmrvl_platform_data *pdata) { struct nfcmrvl_private *priv; int rc; diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index a715543bc9bf..165bd0a95190 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -77,7 +77,7 @@ struct nfcmrvl_private { /* PHY type */ enum nfcmrvl_phy phy; /* Low level driver ops */ - struct nfcmrvl_if_ops *if_ops; + const struct nfcmrvl_if_ops *if_ops; }; struct nfcmrvl_if_ops { @@ -92,9 +92,9 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv); int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb); struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, void *drv_data, - struct nfcmrvl_if_ops *ops, + const struct nfcmrvl_if_ops *ops, struct device *dev, - struct nfcmrvl_platform_data *pdata); + const struct nfcmrvl_platform_data *pdata); void nfcmrvl_chip_reset(struct nfcmrvl_private *priv); diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index 66696321c645..b182ab2e03c0 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -99,7 +99,7 @@ static void nfcmrvl_spi_nci_update_config(struct nfcmrvl_private *priv, drv_data->nci_spi->xfer_speed_hz = config->clk; } -static struct nfcmrvl_if_ops spi_ops = { +static const struct nfcmrvl_if_ops spi_ops = { .nci_open = nfcmrvl_spi_nci_open, .nci_close = nfcmrvl_spi_nci_close, .nci_send = nfcmrvl_spi_nci_send, @@ -129,7 +129,7 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node, static int nfcmrvl_spi_probe(struct spi_device *spi) { - struct nfcmrvl_platform_data *pdata; + const struct nfcmrvl_platform_data *pdata; struct nfcmrvl_platform_data config; struct nfcmrvl_spi_drv_data *drv_data; int ret = 0; diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index 50d86c90b9dd..9c92cbdc42f0 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -49,7 +49,7 @@ static void nfcmrvl_uart_nci_update_config(struct nfcmrvl_private *priv, config->flow_control); } -static struct nfcmrvl_if_ops uart_ops = { +static const struct nfcmrvl_if_ops uart_ops = { .nci_open = nfcmrvl_uart_nci_open, .nci_close = nfcmrvl_uart_nci_close, .nci_send = nfcmrvl_uart_nci_send, @@ -98,8 +98,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node, static int nfcmrvl_nci_uart_open(struct nci_uart *nu) { struct nfcmrvl_private *priv; - struct nfcmrvl_platform_data *pdata = NULL; struct nfcmrvl_platform_data config; + const struct nfcmrvl_platform_data *pdata = NULL; struct device *dev = nu->tty->dev; /* diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index 9d649b45300b..a99aedff795d 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -264,7 +264,7 @@ done: return err; } -static struct nfcmrvl_if_ops usb_ops = { +static const struct nfcmrvl_if_ops usb_ops = { .nci_open = nfcmrvl_usb_nci_open, .nci_close = nfcmrvl_usb_nci_close, .nci_send = nfcmrvl_usb_nci_send, diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index dd27c85190d3..85bf8d586c70 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -239,7 +239,7 @@ static int nfcsim_send(struct nfc_digital_dev *ddev, struct sk_buff *skb, static void nfcsim_abort_cmd(struct nfc_digital_dev *ddev) { - struct nfcsim *dev = nfc_digital_get_drvdata(ddev); + const struct nfcsim *dev = nfc_digital_get_drvdata(ddev); nfcsim_link_recv_cancel(dev->link_in); } @@ -319,7 +319,7 @@ static int nfcsim_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, return nfcsim_send(ddev, NULL, timeout, cb, arg); } -static struct nfc_digital_ops nfcsim_digital_ops = { +static const struct nfc_digital_ops nfcsim_digital_ops = { .in_configure_hw = nfcsim_in_configure_hw, .in_send_cmd = nfcsim_in_send_cmd, diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 2b0c7232e91f..518e2afb43a8 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -83,7 +83,7 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return r; } -static struct nci_ops nxp_nci_ops = { +static const struct nci_ops nxp_nci_ops = { .open = nxp_nci_open, .close = nxp_nci_close, .send = nxp_nci_send, diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index cd64bfe20402..2f3f3fe9a0ba 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2623,7 +2623,7 @@ static int pn533_dev_down(struct nfc_dev *nfc_dev) return ret; } -static struct nfc_ops pn533_nfc_ops = { +static const struct nfc_ops pn533_nfc_ops = { .dev_up = pn533_dev_up, .dev_down = pn533_dev_down, .dep_link_up = pn533_dep_link_up, diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index de59e439c369..37d26f01986b 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -515,7 +515,7 @@ static irqreturn_t pn544_hci_i2c_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = pn544_hci_i2c_write, .enable = pn544_hci_i2c_enable, .disable = pn544_hci_i2c_disable, diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index b788870473e8..092f03b80a78 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -86,7 +86,7 @@ enum pn544_state { #define PN544_HCI_CMD_ATTREQUEST 0x12 #define PN544_HCI_CMD_CONTINUE_ACTIVATION 0x13 -static struct nfc_hci_gate pn544_gates[] = { +static const struct nfc_hci_gate pn544_gates[] = { {NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE}, {NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE}, {NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE}, @@ -108,7 +108,7 @@ static struct nfc_hci_gate pn544_gates[] = { #define PN544_CMDS_HEADROOM 2 struct pn544_hci_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; void *phy_id; struct nfc_hci_dev *hdev; @@ -809,7 +809,7 @@ static int pn544_hci_discover_se(struct nfc_hci_dev *hdev) #define PN544_SE_MODE_ON 0x01 static int pn544_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx) { - struct nfc_se *se; + const struct nfc_se *se; u8 enable = PN544_SE_MODE_ON; static struct uicc_gatelist { u8 head; @@ -864,7 +864,7 @@ static int pn544_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx) static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx) { - struct nfc_se *se; + const struct nfc_se *se; u8 disable = PN544_SE_MODE_OFF; se = nfc_find_se(hdev->ndev, se_idx); @@ -881,7 +881,7 @@ static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx) } } -static struct nfc_hci_ops pn544_hci_ops = { +static const struct nfc_hci_ops pn544_hci_ops = { .open = pn544_hci_open, .close = pn544_hci_close, .hci_ready = pn544_hci_ready, @@ -901,9 +901,10 @@ static struct nfc_hci_ops pn544_hci_ops = { .disable_se = pn544_hci_disable_se, }; -int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - fw_download_t fw_download, struct nfc_hci_dev **hdev) +int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, fw_download_t fw_download, + struct nfc_hci_dev **hdev) { struct pn544_hci_info *info; u32 protocols; diff --git a/drivers/nfc/pn544/pn544.h b/drivers/nfc/pn544/pn544.h index 5634ba215ead..c6fe3e11e0c8 100644 --- a/drivers/nfc/pn544/pn544.h +++ b/drivers/nfc/pn544/pn544.h @@ -16,9 +16,10 @@ typedef int (*fw_download_t)(void *context, const char *firmware_name, u8 hw_variant); -int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - fw_download_t fw_download, struct nfc_hci_dev **hdev); +int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, fw_download_t fw_download, + struct nfc_hci_dev **hdev); void pn544_hci_remove(struct nfc_hci_dev *hdev); #endif /* __LOCAL_PN544_H_ */ diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 4df926cc37d0..517376c43b86 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -217,7 +217,7 @@ struct port100_protocol { u8 value; } __packed; -static struct port100_protocol +static const struct port100_protocol in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = { [NFC_DIGITAL_FRAMING_NFCA_SHORT] = { { PORT100_IN_PROT_INITIAL_GUARD_TIME, 6 }, @@ -391,7 +391,7 @@ in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = { }, }; -static struct port100_protocol +static const struct port100_protocol tg_protocols[][PORT100_TG_MAX_NUM_PROTOCOLS + 1] = { [NFC_DIGITAL_FRAMING_NFCA_SHORT] = { { PORT100_TG_PROT_END, 0 }, @@ -526,7 +526,7 @@ static inline u8 port100_checksum(u16 value) } /* The rule: sum(data elements) + checksum = 0 */ -static u8 port100_data_checksum(u8 *data, int datalen) +static u8 port100_data_checksum(const u8 *data, int datalen) { u8 sum = 0; int i; @@ -568,10 +568,10 @@ static void port100_tx_update_payload_len(void *_frame, int len) le16_add_cpu(&frame->datalen, len); } -static bool port100_rx_frame_is_valid(void *_frame) +static bool port100_rx_frame_is_valid(const void *_frame) { u8 checksum; - struct port100_frame *frame = _frame; + const struct port100_frame *frame = _frame; if (frame->start_frame != cpu_to_be16(PORT100_FRAME_SOF) || frame->extended_frame != cpu_to_be16(PORT100_FRAME_EXT)) @@ -589,23 +589,24 @@ static bool port100_rx_frame_is_valid(void *_frame) return true; } -static bool port100_rx_frame_is_ack(struct port100_ack_frame *frame) +static bool port100_rx_frame_is_ack(const struct port100_ack_frame *frame) { return (frame->start_frame == cpu_to_be16(PORT100_FRAME_SOF) && frame->ack_frame == cpu_to_be16(PORT100_FRAME_ACK)); } -static inline int port100_rx_frame_size(void *frame) +static inline int port100_rx_frame_size(const void *frame) { - struct port100_frame *f = frame; + const struct port100_frame *f = frame; return sizeof(struct port100_frame) + le16_to_cpu(f->datalen) + PORT100_FRAME_TAIL_LEN; } -static bool port100_rx_frame_is_cmd_response(struct port100 *dev, void *frame) +static bool port100_rx_frame_is_cmd_response(const struct port100 *dev, + const void *frame) { - struct port100_frame *f = frame; + const struct port100_frame *f = frame; return (PORT100_FRAME_CMD(f) == PORT100_CMD_RESPONSE(dev->cmd->code)); } @@ -655,7 +656,8 @@ sched_wq: schedule_work(&dev->cmd_complete_work); } -static int port100_submit_urb_for_response(struct port100 *dev, gfp_t flags) +static int port100_submit_urb_for_response(const struct port100 *dev, + gfp_t flags) { dev->in_urb->complete = port100_recv_response; @@ -666,7 +668,7 @@ static void port100_recv_ack(struct urb *urb) { struct port100 *dev = urb->context; struct port100_cmd *cmd = dev->cmd; - struct port100_ack_frame *in_frame; + const struct port100_ack_frame *in_frame; int rc; cmd->status = urb->status; @@ -708,7 +710,7 @@ sched_wq: schedule_work(&dev->cmd_complete_work); } -static int port100_submit_urb_for_ack(struct port100 *dev, gfp_t flags) +static int port100_submit_urb_for_ack(const struct port100 *dev, gfp_t flags) { dev->in_urb->complete = port100_recv_ack; @@ -753,8 +755,9 @@ static int port100_send_ack(struct port100 *dev) return rc; } -static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out, - struct sk_buff *in, int in_len) +static int port100_send_frame_async(struct port100 *dev, + const struct sk_buff *out, + const struct sk_buff *in, int in_len) { int rc; @@ -960,7 +963,7 @@ static void port100_abort_cmd(struct nfc_digital_dev *ddev) usb_kill_urb(dev->in_urb); } -static struct sk_buff *port100_alloc_skb(struct port100 *dev, unsigned int size) +static struct sk_buff *port100_alloc_skb(const struct port100 *dev, unsigned int size) { struct sk_buff *skb; @@ -1098,7 +1101,7 @@ static int port100_in_set_rf(struct nfc_digital_dev *ddev, u8 rf) static int port100_in_set_framing(struct nfc_digital_dev *ddev, int param) { struct port100 *dev = nfc_digital_get_drvdata(ddev); - struct port100_protocol *protocols; + const struct port100_protocol *protocols; struct sk_buff *skb; struct sk_buff *resp; int num_protocols; @@ -1152,7 +1155,7 @@ static int port100_in_configure_hw(struct nfc_digital_dev *ddev, int type, static void port100_in_comm_rf_complete(struct port100 *dev, void *arg, struct sk_buff *resp) { - struct port100_cb_arg *cb_arg = arg; + const struct port100_cb_arg *cb_arg = arg; nfc_digital_cmd_complete_t cb = cb_arg->complete_cb; u32 status; int rc; @@ -1255,7 +1258,7 @@ static int port100_tg_set_rf(struct nfc_digital_dev *ddev, u8 rf) static int port100_tg_set_framing(struct nfc_digital_dev *ddev, int param) { struct port100 *dev = nfc_digital_get_drvdata(ddev); - struct port100_protocol *protocols; + const struct port100_protocol *protocols; struct sk_buff *skb; struct sk_buff *resp; int rc; @@ -1330,7 +1333,7 @@ static void port100_tg_comm_rf_complete(struct port100 *dev, void *arg, struct sk_buff *resp) { u32 status; - struct port100_cb_arg *cb_arg = arg; + const struct port100_cb_arg *cb_arg = arg; nfc_digital_cmd_complete_t cb = cb_arg->complete_cb; struct port100_tg_comm_rf_res *hdr; @@ -1453,7 +1456,7 @@ static int port100_listen_mdaa(struct nfc_digital_dev *ddev, static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout, nfc_digital_cmd_complete_t cb, void *arg) { - struct port100 *dev = nfc_digital_get_drvdata(ddev); + const struct port100 *dev = nfc_digital_get_drvdata(ddev); struct sk_buff *skb; skb = port100_alloc_skb(dev, 0); @@ -1463,7 +1466,7 @@ static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout, return port100_tg_send_cmd(ddev, skb, timeout, cb, arg); } -static struct nfc_digital_ops port100_digital_ops = { +static const struct nfc_digital_ops port100_digital_ops = { .in_configure_hw = port100_in_configure_hw, .in_send_cmd = port100_in_send_cmd, diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 865d3e3d1528..1c412007fabb 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -143,11 +143,13 @@ static int s3fwrn5_nci_post_setup(struct nci_dev *ndev) return nci_core_init(info->ndev); } -static struct nci_ops s3fwrn5_nci_ops = { +static const struct nci_ops s3fwrn5_nci_ops = { .open = s3fwrn5_nci_open, .close = s3fwrn5_nci_close, .send = s3fwrn5_nci_send, .post_setup = s3fwrn5_nci_post_setup, + .prop_ops = s3fwrn5_nci_prop_ops, + .n_prop_ops = ARRAY_SIZE(s3fwrn5_nci_prop_ops), }; int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev, @@ -167,9 +169,6 @@ int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev, s3fwrn5_set_mode(info, S3FWRN5_MODE_COLD); - s3fwrn5_nci_get_prop_ops(&s3fwrn5_nci_ops.prop_ops, - &s3fwrn5_nci_ops.n_prop_ops); - info->ndev = nci_allocate_device(&s3fwrn5_nci_ops, S3FWRN5_NFC_PROTOCOLS, 0, 0); if (!info->ndev) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index e3e72b8a29e3..1af7a1e632cf 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -421,10 +421,9 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) tfm = crypto_alloc_shash("sha1", 0, 0); if (IS_ERR(tfm)) { - ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, "Cannot allocate shash (code=%pe)\n", tfm); - goto out; + return PTR_ERR(tfm); } ret = crypto_shash_tfm_digest(tfm, fw->image, image_size, hash_data); @@ -433,7 +432,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret) { dev_err(&fw_info->ndev->nfc_dev->dev, "Cannot compute hash (code=%d)\n", ret); - goto out; + return ret; } /* Firmware update process */ @@ -446,7 +445,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret < 0) { dev_err(&fw_info->ndev->nfc_dev->dev, "Unable to enter update mode\n"); - goto out; + return ret; } for (off = 0; off < image_size; off += fw_info->sector_size) { @@ -455,7 +454,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret < 0) { dev_err(&fw_info->ndev->nfc_dev->dev, "Firmware update error (code=%d)\n", ret); - goto out; + return ret; } } @@ -463,13 +462,12 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret < 0) { dev_err(&fw_info->ndev->nfc_dev->dev, "Unable to complete update mode\n"); - goto out; + return ret; } dev_info(&fw_info->ndev->nfc_dev->dev, "Firmware update: success\n"); -out: return ret; } diff --git a/drivers/nfc/s3fwrn5/nci.c b/drivers/nfc/s3fwrn5/nci.c index f042d3eaf8f6..e374e670b36b 100644 --- a/drivers/nfc/s3fwrn5/nci.c +++ b/drivers/nfc/s3fwrn5/nci.c @@ -20,7 +20,7 @@ static int s3fwrn5_nci_prop_rsp(struct nci_dev *ndev, struct sk_buff *skb) return 0; } -static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = { +const struct nci_driver_ops s3fwrn5_nci_prop_ops[4] = { { .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, NCI_PROP_SET_RFREG), @@ -43,12 +43,6 @@ static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = { }, }; -void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n) -{ - *ops = s3fwrn5_nci_prop_ops; - *n = ARRAY_SIZE(s3fwrn5_nci_prop_ops); -} - #define S3FWRN5_RFREG_SECTION_SIZE 252 int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name) diff --git a/drivers/nfc/s3fwrn5/nci.h b/drivers/nfc/s3fwrn5/nci.h index a80f0fb082a8..c2d906591e9e 100644 --- a/drivers/nfc/s3fwrn5/nci.h +++ b/drivers/nfc/s3fwrn5/nci.h @@ -50,7 +50,7 @@ struct nci_prop_fw_cfg_rsp { __u8 status; }; -void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n); +extern const struct nci_driver_ops s3fwrn5_nci_prop_ops[4]; int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name); #endif /* __LOCAL_S3FWRN5_NCI_H_ */ diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c index 110ff1281e5f..72bb51efdf9c 100644 --- a/drivers/nfc/st-nci/core.c +++ b/drivers/nfc/st-nci/core.c @@ -86,7 +86,7 @@ static int st_nci_prop_rsp_packet(struct nci_dev *ndev, return 0; } -static struct nci_driver_ops st_nci_prop_ops[] = { +static const struct nci_driver_ops st_nci_prop_ops[] = { { .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, ST_NCI_CORE_PROP), @@ -94,7 +94,7 @@ static struct nci_driver_ops st_nci_prop_ops[] = { }, }; -static struct nci_ops st_nci_ops = { +static const struct nci_ops st_nci_ops = { .init = st_nci_init, .open = st_nci_open, .close = st_nci_close, @@ -131,6 +131,7 @@ int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, | NFC_PROTO_ISO15693_MASK | NFC_PROTO_NFC_DEP_MASK; + BUILD_BUG_ON(ARRAY_SIZE(st_nci_prop_ops) > NCI_MAX_PROPRIETARY_CMD); ndlc->ndev = nci_allocate_device(&st_nci_ops, protocols, phy_headroom, phy_tailroom); if (!ndlc->ndev) { diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c index 46981405e8b1..ccf6152ebb9f 100644 --- a/drivers/nfc/st-nci/i2c.c +++ b/drivers/nfc/st-nci/i2c.c @@ -186,7 +186,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = st_nci_i2c_write, .enable = st_nci_i2c_enable, .disable = st_nci_i2c_disable, diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c index 5d74c674368a..e9dc313b333e 100644 --- a/drivers/nfc/st-nci/ndlc.c +++ b/drivers/nfc/st-nci/ndlc.c @@ -253,9 +253,9 @@ static void ndlc_t2_timeout(struct timer_list *t) schedule_work(&ndlc->sm_work); } -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id, - struct st_nci_se_status *se_status) +int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + struct device *dev, int phy_headroom, int phy_tailroom, + struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status) { struct llt_ndlc *ndlc; diff --git a/drivers/nfc/st-nci/ndlc.h b/drivers/nfc/st-nci/ndlc.h index 066e2fd75238..c24ce9b0df52 100644 --- a/drivers/nfc/st-nci/ndlc.h +++ b/drivers/nfc/st-nci/ndlc.h @@ -16,7 +16,7 @@ struct st_nci_se_status; /* Low Level Transport description */ struct llt_ndlc { struct nci_dev *ndev; - struct nfc_phy_ops *ops; + const struct nfc_phy_ops *ops; void *phy_id; struct timer_list t1_timer; @@ -45,8 +45,8 @@ int ndlc_open(struct llt_ndlc *ndlc); void ndlc_close(struct llt_ndlc *ndlc); int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb); void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb); -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id, - struct st_nci_se_status *se_status); +int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + struct device *dev, int phy_headroom, int phy_tailroom, + struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status); void ndlc_remove(struct llt_ndlc *ndlc); #endif /* __LOCAL_NDLC_H__ */ diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index 250d56f204c3..a620c34790e6 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -198,7 +198,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops spi_phy_ops = { +static const struct nfc_phy_ops spi_phy_ops = { .write = st_nci_spi_write, .enable = st_nci_spi_enable, .disable = st_nci_spi_disable, diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c index 94b600029a2a..30d2912d1a05 100644 --- a/drivers/nfc/st-nci/vendor_cmds.c +++ b/drivers/nfc/st-nci/vendor_cmds.c @@ -371,7 +371,7 @@ static int st_nci_manufacturer_specific(struct nfc_dev *dev, void *data, return nfc_vendor_cmd_reply(msg); } -static struct nfc_vendor_cmd st_nci_vendor_cmds[] = { +static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = { { .vendor_id = ST_NCI_VENDOR_OUI, .subcmd = FACTORY_MODE, diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index 6ca0d2f56b18..5e6c99fcfd27 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -72,7 +72,7 @@ static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES); -static struct nfc_hci_gate st21nfca_gates[] = { +static const struct nfc_hci_gate st21nfca_gates[] = { {NFC_HCI_ADMIN_GATE, NFC_HCI_ADMIN_PIPE}, {NFC_HCI_LINK_MGMT_GATE, NFC_HCI_LINK_MGMT_PIPE}, {ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE}, @@ -912,7 +912,7 @@ static int st21nfca_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, } } -static struct nfc_hci_ops st21nfca_hci_ops = { +static const struct nfc_hci_ops st21nfca_hci_ops = { .open = st21nfca_hci_open, .close = st21nfca_hci_close, .load_session = st21nfca_hci_load_session, @@ -935,7 +935,7 @@ static struct nfc_hci_ops st21nfca_hci_ops = { .se_io = st21nfca_hci_se_io, }; -int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, +int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, char *llc_name, int phy_headroom, int phy_tailroom, int phy_payload, struct nfc_hci_dev **hdev, struct st21nfca_se_status *se_status) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 7a9f4d71707e..1b44a37a71aa 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -76,8 +76,8 @@ struct st21nfca_i2c_phy { struct mutex phy_lock; }; -static u8 len_seq[] = { 16, 24, 12, 29 }; -static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; +static const u8 len_seq[] = { 16, 24, 12, 29 }; +static const u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; #define I2C_DUMP_SKB(info, skb) \ do { \ @@ -482,7 +482,7 @@ static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = st21nfca_hci_i2c_write, .enable = st21nfca_hci_i2c_enable, .disable = st21nfca_hci_i2c_disable, diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h index 5e0de0fef1d4..cb6ad916be91 100644 --- a/drivers/nfc/st21nfca/st21nfca.h +++ b/drivers/nfc/st21nfca/st21nfca.h @@ -144,7 +144,7 @@ struct st21nfca_se_info { }; struct st21nfca_hci_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; void *phy_id; struct nfc_hci_dev *hdev; @@ -163,7 +163,7 @@ struct st21nfca_hci_info { struct st21nfca_vendor_info vendor_info; }; -int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, +int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, char *llc_name, int phy_headroom, int phy_tailroom, int phy_payload, struct nfc_hci_dev **hdev, struct st21nfca_se_status *se_status); diff --git a/drivers/nfc/st21nfca/vendor_cmds.c b/drivers/nfc/st21nfca/vendor_cmds.c index 62332ca91554..74882866dbaf 100644 --- a/drivers/nfc/st21nfca/vendor_cmds.c +++ b/drivers/nfc/st21nfca/vendor_cmds.c @@ -295,7 +295,7 @@ exit: return r; } -static struct nfc_vendor_cmd st21nfca_vendor_cmds[] = { +static const struct nfc_vendor_cmd st21nfca_vendor_cmds[] = { { .vendor_id = ST21NFCA_VENDOR_OUI, .subcmd = FACTORY_MODE, diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index 2dc788c363fd..993818742570 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1037,7 +1037,7 @@ static void st95hf_abort_cmd(struct nfc_digital_dev *ddev) { } -static struct nfc_digital_ops st95hf_nfc_digital_ops = { +static const struct nfc_digital_ops st95hf_nfc_digital_ops = { .in_configure_hw = st95hf_in_configure_hw, .in_send_cmd = st95hf_in_send_cmd, diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 33978022ae47..8890fcd59c39 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -643,7 +643,7 @@ static void trf7970a_send_err_upstream(struct trf7970a *trf, int errno) } static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb, - unsigned int len, u8 *prefix, + unsigned int len, const u8 *prefix, unsigned int prefix_len) { struct spi_transfer t[2]; @@ -1387,9 +1387,10 @@ static int trf7970a_is_iso15693_write_or_lock(u8 cmd) } } -static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb) +static int trf7970a_per_cmd_config(struct trf7970a *trf, + const struct sk_buff *skb) { - u8 *req = skb->data; + const u8 *req = skb->data; u8 special_fcn_reg1, iso_ctrl; int ret; @@ -1791,7 +1792,7 @@ out_err: static int trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, nfc_digital_cmd_complete_t cb, void *arg) { - struct trf7970a *trf = nfc_digital_get_drvdata(ddev); + const struct trf7970a *trf = nfc_digital_get_drvdata(ddev); dev_dbg(trf->dev, "Listen - state: %d, timeout: %d ms\n", trf->state, timeout); @@ -1803,7 +1804,7 @@ static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev, u16 timeout, nfc_digital_cmd_complete_t cb, void *arg) { - struct trf7970a *trf = nfc_digital_get_drvdata(ddev); + const struct trf7970a *trf = nfc_digital_get_drvdata(ddev); int ret; dev_dbg(trf->dev, "Listen MD - state: %d, timeout: %d ms\n", @@ -1824,7 +1825,7 @@ static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev, static int trf7970a_tg_get_rf_tech(struct nfc_digital_dev *ddev, u8 *rf_tech) { - struct trf7970a *trf = nfc_digital_get_drvdata(ddev); + const struct trf7970a *trf = nfc_digital_get_drvdata(ddev); dev_dbg(trf->dev, "Get RF Tech - state: %d, rf_tech: %d\n", trf->state, trf->md_rf_tech); @@ -1861,7 +1862,7 @@ static void trf7970a_abort_cmd(struct nfc_digital_dev *ddev) mutex_unlock(&trf->lock); } -static struct nfc_digital_ops trf7970a_nfc_ops = { +static const struct nfc_digital_ops trf7970a_nfc_ops = { .in_configure_hw = trf7970a_in_configure_hw, .in_send_cmd = trf7970a_send_cmd, .tg_configure_hw = trf7970a_tg_configure_hw, @@ -1974,7 +1975,7 @@ static void trf7970a_shutdown(struct trf7970a *trf) trf7970a_power_down(trf); } -static int trf7970a_get_autosuspend_delay(struct device_node *np) +static int trf7970a_get_autosuspend_delay(const struct device_node *np) { int autosuspend_delay, ret; @@ -1987,7 +1988,7 @@ static int trf7970a_get_autosuspend_delay(struct device_node *np) static int trf7970a_probe(struct spi_device *spi) { - struct device_node *np = spi->dev.of_node; + const struct device_node *np = spi->dev.of_node; struct trf7970a *trf; int uvolts, autosuspend_delay, ret; u32 clk_freq = TRF7970A_13MHZ_CLOCK_FREQUENCY; diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index f73ee0bf3593..2ee0ec4bb739 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -65,7 +65,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return 0; } -static struct nci_ops virtual_nci_ops = { +static const struct nci_ops virtual_nci_ops = { .open = virtual_nci_open, .close = virtual_nci_close, .send = virtual_nci_send @@ -170,7 +170,7 @@ static int virtual_ncidev_close(struct inode *inode, struct file *file) static long virtual_ncidev_ioctl(struct file *flip, unsigned int cmd, unsigned long arg) { - struct nfc_dev *nfc_dev = ndev->nfc_dev; + const struct nfc_dev *nfc_dev = ndev->nfc_dev; void __user *p = (void __user *)arg; if (cmd != IOCTL_GET_NCIDEV_IDX) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 8c20e524e9ad..8b08745e1ca1 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -157,6 +157,13 @@ config PTP_1588_CLOCK_OCP tristate "OpenCompute TimeCard as PTP clock" depends on PTP_1588_CLOCK depends on HAS_IOMEM && PCI + depends on SPI && I2C && MTD + imply SPI_MEM + imply SPI_XILINX + imply MTD_SPI_NOR + imply I2C_XILINX + select SERIAL_8250 + default n help This driver adds support for an OpenCompute time card. diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 0d1034e3ed0f..039d3a5c2a6f 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -6,15 +6,29 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/pci.h> +#include <linux/serial_8250.h> +#include <linux/clkdev.h> +#include <linux/clk-provider.h> +#include <linux/platform_device.h> #include <linux/ptp_clock_kernel.h> +#include <linux/spi/spi.h> +#include <linux/spi/xilinx_spi.h> +#include <net/devlink.h> +#include <linux/i2c.h> +#include <linux/mtd/mtd.h> -static const struct pci_device_id ptp_ocp_pcidev_id[] = { - { PCI_DEVICE(0x1d9b, 0x0400) }, - { 0 } -}; -MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); +#ifndef PCI_VENDOR_ID_FACEBOOK +#define PCI_VENDOR_ID_FACEBOOK 0x1d9b +#endif -#define OCP_REGISTER_OFFSET 0x01000000 +#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD +#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400 +#endif + +static struct class timecard_class = { + .owner = THIS_MODULE, + .name = "timecard", +}; struct ocp_reg { u32 ctrl; @@ -29,18 +43,29 @@ struct ocp_reg { u32 __pad1[2]; u32 offset_ns; u32 offset_window_ns; + u32 __pad2[2]; + u32 drift_ns; + u32 drift_window_ns; + u32 __pad3[6]; + u32 servo_offset_p; + u32 servo_offset_i; + u32 servo_drift_p; + u32 servo_drift_i; }; #define OCP_CTRL_ENABLE BIT(0) #define OCP_CTRL_ADJUST_TIME BIT(1) #define OCP_CTRL_ADJUST_OFFSET BIT(2) +#define OCP_CTRL_ADJUST_DRIFT BIT(3) +#define OCP_CTRL_ADJUST_SERVO BIT(8) #define OCP_CTRL_READ_TIME_REQ BIT(30) #define OCP_CTRL_READ_TIME_DONE BIT(31) #define OCP_STATUS_IN_SYNC BIT(0) +#define OCP_STATUS_IN_HOLDOVER BIT(1) #define OCP_SELECT_CLK_NONE 0 -#define OCP_SELECT_CLK_REG 6 +#define OCP_SELECT_CLK_REG 0xfe struct tod_reg { u32 ctrl; @@ -55,8 +80,6 @@ struct tod_reg { u32 leap; }; -#define TOD_REGISTER_OFFSET 0x01050000 - #define TOD_CTRL_PROTOCOL BIT(28) #define TOD_CTRL_DISABLE_FMT_A BIT(17) #define TOD_CTRL_DISABLE_FMT_B BIT(16) @@ -68,16 +91,260 @@ struct tod_reg { #define TOD_STATUS_UTC_VALID BIT(8) #define TOD_STATUS_LEAP_VALID BIT(16) +struct ts_reg { + u32 enable; + u32 error; + u32 polarity; + u32 version; + u32 __pad0[4]; + u32 cable_delay; + u32 __pad1[3]; + u32 intr; + u32 intr_mask; + u32 event_count; + u32 __pad2[1]; + u32 ts_count; + u32 time_ns; + u32 time_sec; + u32 data_width; + u32 data; +}; + +struct pps_reg { + u32 ctrl; + u32 status; +}; + +#define PPS_STATUS_FILTER_ERR BIT(0) +#define PPS_STATUS_SUPERV_ERR BIT(1) + +struct img_reg { + u32 version; +}; + +struct ptp_ocp_flash_info { + const char *name; + int pci_offset; + int data_size; + void *data; +}; + +struct ptp_ocp_ext_info { + const char *name; + int index; + irqreturn_t (*irq_fcn)(int irq, void *priv); + int (*enable)(void *priv, bool enable); +}; + +struct ptp_ocp_ext_src { + void __iomem *mem; + struct ptp_ocp *bp; + struct ptp_ocp_ext_info *info; + int irq_vec; +}; + struct ptp_ocp { struct pci_dev *pdev; + struct device dev; spinlock_t lock; - void __iomem *base; struct ocp_reg __iomem *reg; struct tod_reg __iomem *tod; + struct pps_reg __iomem *pps_monitor; + struct ptp_ocp_ext_src *pps; + struct ptp_ocp_ext_src *ts0; + struct ptp_ocp_ext_src *ts1; + struct img_reg __iomem *image; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; + struct platform_device *i2c_ctrl; + struct platform_device *spi_flash; + struct clk_hw *i2c_clk; + struct devlink_health_reporter *health; + struct timer_list watchdog; + time64_t gps_lost; + int id; + int n_irqs; + int gps_port; + int mac_port; /* miniature atomic clock */ + u8 serial[6]; + int flash_start; + bool has_serial; + bool pending_image; +}; + +struct ocp_resource { + unsigned long offset; + int size; + int irq_vec; + int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r); + void *extra; + unsigned long bp_offset; +}; + +static void ptp_ocp_health_update(struct ptp_ocp *bp); +static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r); +static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv); +static int ptp_ocp_ts_enable(void *priv, bool enable); + +#define bp_assign_entry(bp, res, val) ({ \ + uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset; \ + *(typeof(val) *)addr = val; \ +}) + +#define OCP_RES_LOCATION(member) \ + .bp_offset = offsetof(struct ptp_ocp, member) + +#define OCP_MEM_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem + +#define OCP_SERIAL_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial + +#define OCP_I2C_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c + +#define OCP_SPI_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi + +#define OCP_EXT_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext + +/* This is the MSI vector mapping used. + * 0: N/C + * 1: TS0 + * 2: TS1 + * 3: GPS + * 4: GPS2 (n/c) + * 5: MAC + * 6: SPI IMU (inertial measurement unit) + * 7: I2C oscillator + * 8: HWICAP + * 9: SPI Flash + */ + +static struct ocp_resource ocp_fb_resource[] = { + { + OCP_MEM_RESOURCE(reg), + .offset = 0x01000000, .size = 0x10000, + }, + { + OCP_EXT_RESOURCE(ts0), + .offset = 0x01010000, .size = 0x10000, .irq_vec = 1, + .extra = &(struct ptp_ocp_ext_info) { + .name = "ts0", .index = 0, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(ts1), + .offset = 0x01020000, .size = 0x10000, .irq_vec = 2, + .extra = &(struct ptp_ocp_ext_info) { + .name = "ts1", .index = 1, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_MEM_RESOURCE(pps_monitor), + .offset = 0x01040000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(tod), + .offset = 0x01050000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(image), + .offset = 0x00020000, .size = 0x1000, + }, + { + OCP_I2C_RESOURCE(i2c_ctrl), + .offset = 0x00150000, .size = 0x10000, .irq_vec = 7, + }, + { + OCP_SERIAL_RESOURCE(gps_port), + .offset = 0x00160000 + 0x1000, .irq_vec = 3, + }, + { + OCP_SERIAL_RESOURCE(mac_port), + .offset = 0x00180000 + 0x1000, .irq_vec = 5, + }, + { + OCP_SPI_RESOURCE(spi_flash), + .offset = 0x00310000, .size = 0x10000, .irq_vec = 9, + .extra = &(struct ptp_ocp_flash_info) { + .name = "xilinx_spi", .pci_offset = 0, + .data_size = sizeof(struct xspi_platform_data), + .data = &(struct xspi_platform_data) { + .num_chipselect = 1, + .bits_per_word = 8, + .num_devices = 1, + .devices = &(struct spi_board_info) { + .modalias = "spi-nor", + }, + }, + }, + }, + { + .setup = ptp_ocp_fb_board_init, + }, + { } +}; + +static const struct pci_device_id ptp_ocp_pcidev_id[] = { + { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) }, + { 0 } +}; +MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); + +static DEFINE_MUTEX(ptp_ocp_lock); +static DEFINE_IDR(ptp_ocp_idr); + +static struct { + const char *name; + int value; +} ptp_ocp_clock[] = { + { .name = "NONE", .value = 0 }, + { .name = "TOD", .value = 1 }, + { .name = "IRIG", .value = 2 }, + { .name = "PPS", .value = 3 }, + { .name = "PTP", .value = 4 }, + { .name = "RTC", .value = 5 }, + { .name = "DCF", .value = 6 }, + { .name = "REGS", .value = 0xfe }, + { .name = "EXT", .value = 0xff }, }; +static const char * +ptp_ocp_clock_name_from_val(int val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) + if (ptp_ocp_clock[i].value == val) + return ptp_ocp_clock[i].name; + return NULL; +} + +static int +ptp_ocp_clock_val_from_name(const char *name) +{ + const char *clk; + int i; + + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) { + clk = ptp_ocp_clock[i].name; + if (!strncasecmp(name, clk, strlen(clk))) + return ptp_ocp_clock[i].value; + } + return -EINVAL; +} + static int __ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts, struct ptp_system_timestamp *sts) @@ -192,6 +459,45 @@ ptp_ocp_null_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm) return -EOPNOTSUPP; } +static int +ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns) +{ + return -EOPNOTSUPP; +} + +static int +ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq, + int on) +{ + struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info); + struct ptp_ocp_ext_src *ext = NULL; + int err; + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + switch (rq->extts.index) { + case 0: + ext = bp->ts0; + break; + case 1: + ext = bp->ts1; + break; + } + break; + case PTP_CLK_REQ_PPS: + ext = bp->pps; + break; + default: + return -EOPNOTSUPP; + } + + err = -ENXIO; + if (ext) + err = ext->info->enable(ext, on); + + return err; +} + static const struct ptp_clock_info ptp_ocp_clock_info = { .owner = THIS_MODULE, .name = KBUILD_MODNAME, @@ -200,10 +506,59 @@ static const struct ptp_clock_info ptp_ocp_clock_info = { .settime64 = ptp_ocp_settime, .adjtime = ptp_ocp_adjtime, .adjfine = ptp_ocp_null_adjfine, + .adjphase = ptp_ocp_adjphase, + .enable = ptp_ocp_enable, + .pps = true, + .n_ext_ts = 2, }; +static void +__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp) +{ + u32 ctrl, select; + + select = ioread32(&bp->reg->select); + iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select); + + iowrite32(0, &bp->reg->drift_ns); + + ctrl = ioread32(&bp->reg->ctrl); + ctrl |= OCP_CTRL_ADJUST_DRIFT; + iowrite32(ctrl, &bp->reg->ctrl); + + /* restore clock selection */ + iowrite32(select >> 16, &bp->reg->select); +} + +static void +ptp_ocp_watchdog(struct timer_list *t) +{ + struct ptp_ocp *bp = from_timer(bp, t, watchdog); + unsigned long flags; + u32 status; + + status = ioread32(&bp->pps_monitor->status); + + if (status & PPS_STATUS_SUPERV_ERR) { + iowrite32(status, &bp->pps_monitor->status); + if (!bp->gps_lost) { + spin_lock_irqsave(&bp->lock, flags); + __ptp_ocp_clear_drift_locked(bp); + spin_unlock_irqrestore(&bp->lock, flags); + bp->gps_lost = ktime_get_real_seconds(); + ptp_ocp_health_update(bp); + } + + } else if (bp->gps_lost) { + bp->gps_lost = 0; + ptp_ocp_health_update(bp); + } + + mod_timer(&bp->watchdog, jiffies + HZ); +} + static int -ptp_ocp_check_clock(struct ptp_ocp *bp) +ptp_ocp_init_clock(struct ptp_ocp *bp) { struct timespec64 ts; bool sync; @@ -214,6 +569,17 @@ ptp_ocp_check_clock(struct ptp_ocp *bp) ctrl |= OCP_CTRL_ENABLE; iowrite32(ctrl, &bp->reg->ctrl); + /* NO DRIFT Correction */ + /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */ + iowrite32(0x2000, &bp->reg->servo_offset_p); + iowrite32(0x1000, &bp->reg->servo_offset_i); + iowrite32(0, &bp->reg->servo_drift_p); + iowrite32(0, &bp->reg->servo_drift_i); + + /* latch servo values */ + ctrl |= OCP_CTRL_ADJUST_SERVO; + iowrite32(ctrl, &bp->reg->ctrl); + if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) { dev_err(&bp->pdev->dev, "clock not enabled\n"); return -ENODEV; @@ -229,6 +595,9 @@ ptp_ocp_check_clock(struct ptp_ocp *bp) ts.tv_sec, ts.tv_nsec, sync ? "in-sync" : "UNSYNCED"); + timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0); + mod_timer(&bp->watchdog, jiffies + HZ); + return 0; } @@ -278,82 +647,942 @@ ptp_ocp_tod_info(struct ptp_ocp *bp) reg & TOD_STATUS_LEAP_VALID ? 1 : 0); } +static int +ptp_ocp_firstchild(struct device *dev, void *data) +{ + return 1; +} + +static int +ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data) +{ + struct i2c_msg msgs[2] = { + { + .addr = addr, + .len = 1, + .buf = ®, + }, + { + .addr = addr, + .flags = I2C_M_RD, + .len = 2, + .buf = data, + }, + }; + int err; + u8 len; + + /* xiic-i2c for some stupid reason only does 2 byte reads. */ + while (sz) { + len = min_t(u8, sz, 2); + msgs[1].len = len; + err = i2c_transfer(adap, msgs, 2); + if (err != msgs[1].len) + return err; + msgs[1].buf += len; + reg += len; + sz -= len; + } + return 0; +} + +static void +ptp_ocp_get_serial_number(struct ptp_ocp *bp) +{ + struct i2c_adapter *adap; + struct device *dev; + int err; + + dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild); + if (!dev) { + dev_err(&bp->pdev->dev, "Can't find I2C adapter\n"); + return; + } + + adap = i2c_verify_adapter(dev); + if (!adap) { + dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n", + dev_name(dev)); + goto out; + } + + err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial); + if (err) { + dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err); + goto out; + } + + bp->has_serial = true; + +out: + put_device(dev); +} + static void ptp_ocp_info(struct ptp_ocp *bp) { - static const char * const clock_name[] = { - "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT" - }; u32 version, select; version = ioread32(&bp->reg->version); select = ioread32(&bp->reg->select); dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n", version >> 24, (version >> 16) & 0xff, version & 0xffff, - clock_name[select & 7], + ptp_ocp_clock_name_from_val(select >> 16), ptp_clock_index(bp->ptp)); ptp_ocp_tod_info(bp); } +static const struct devlink_param ptp_ocp_devlink_params[] = { +}; + +static void +ptp_ocp_devlink_set_params_init_values(struct devlink *devlink) +{ +} + static int -ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) +ptp_ocp_devlink_register(struct devlink *devlink, struct device *dev) { - struct ptp_ocp *bp; int err; - bp = kzalloc(sizeof(*bp), GFP_KERNEL); - if (!bp) + err = devlink_register(devlink, dev); + if (err) + return err; + + err = devlink_params_register(devlink, ptp_ocp_devlink_params, + ARRAY_SIZE(ptp_ocp_devlink_params)); + ptp_ocp_devlink_set_params_init_values(devlink); + if (err) + goto out; + devlink_params_publish(devlink); + + return 0; + +out: + devlink_unregister(devlink); + return err; +} + +static void +ptp_ocp_devlink_unregister(struct devlink *devlink) +{ + devlink_params_unregister(devlink, ptp_ocp_devlink_params, + ARRAY_SIZE(ptp_ocp_devlink_params)); + devlink_unregister(devlink); +} + +static struct device * +ptp_ocp_find_flash(struct ptp_ocp *bp) +{ + struct device *dev, *last; + + last = NULL; + dev = &bp->spi_flash->dev; + + while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) { + if (!strcmp("mtd", dev_bus_name(dev))) + break; + put_device(last); + last = dev; + } + put_device(last); + + return dev; +} + +static int +ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev, + const struct firmware *fw) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + struct ptp_ocp *bp = devlink_priv(devlink); + size_t off, len, resid, wrote; + struct erase_info erase; + size_t base, blksz; + int err; + + off = 0; + base = bp->flash_start; + blksz = 4096; + resid = fw->size; + + while (resid) { + devlink_flash_update_status_notify(devlink, "Flashing", + NULL, off, fw->size); + + len = min_t(size_t, resid, blksz); + erase.addr = base + off; + erase.len = blksz; + + err = mtd_erase(mtd, &erase); + if (err) + goto out; + + err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]); + if (err) + goto out; + + off += blksz; + resid -= len; + } +out: + return err; +} + +static int +ptp_ocp_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = devlink_priv(devlink); + struct device *dev; + const char *msg; + int err; + + dev = ptp_ocp_find_flash(bp); + if (!dev) { + dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n"); + return -ENODEV; + } + + devlink_flash_update_status_notify(devlink, "Preparing to flash", + NULL, 0, 0); + + err = ptp_ocp_devlink_flash(devlink, dev, params->fw); + + msg = err ? "Flash error" : "Flash complete"; + devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0); + + bp->pending_image = true; + + put_device(dev); + return err; +} + +static int +ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = devlink_priv(devlink); + char buf[32]; + int err; + + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (err) + return err; + + if (bp->pending_image) { + err = devlink_info_version_stored_put(req, + "timecard", "pending"); + if (err) + return err; + } + + if (bp->image) { + u32 ver = ioread32(&bp->image->version); + + if (ver & 0xffff) { + sprintf(buf, "%d", ver); + err = devlink_info_version_running_put(req, + "timecard", + buf); + } else { + sprintf(buf, "%d", ver >> 16); + err = devlink_info_version_running_put(req, + "golden flash", + buf); + } + if (err) + return err; + } + + if (!bp->has_serial) + ptp_ocp_get_serial_number(bp); + + if (bp->has_serial) { + sprintf(buf, "%pM", bp->serial); + err = devlink_info_serial_number_put(req, buf); + if (err) + return err; + } + + return 0; +} + +static const struct devlink_ops ptp_ocp_devlink_ops = { + .flash_update = ptp_ocp_devlink_flash_update, + .info_get = ptp_ocp_devlink_info_get, +}; + +static int +ptp_ocp_health_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = devlink_health_reporter_priv(reporter); + char buf[32]; + int err; + + if (!bp->gps_lost) + return 0; + + sprintf(buf, "%ptT", &bp->gps_lost); + err = devlink_fmsg_string_pair_put(fmsg, "Lost sync at", buf); + if (err) + return err; + + return 0; +} + +static void +ptp_ocp_health_update(struct ptp_ocp *bp) +{ + int state; + + state = bp->gps_lost ? DEVLINK_HEALTH_REPORTER_STATE_ERROR + : DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; + + if (bp->gps_lost) + devlink_health_report(bp->health, "No GPS signal", NULL); + + devlink_health_reporter_state_update(bp->health, state); +} + +static const struct devlink_health_reporter_ops ptp_ocp_health_ops = { + .name = "gps_sync", + .diagnose = ptp_ocp_health_diagnose, +}; + +static void +ptp_ocp_devlink_health_register(struct devlink *devlink) +{ + struct ptp_ocp *bp = devlink_priv(devlink); + struct devlink_health_reporter *r; + + r = devlink_health_reporter_create(devlink, &ptp_ocp_health_ops, 0, bp); + if (IS_ERR(r)) + dev_err(&bp->pdev->dev, "Failed to create reporter, err %ld\n", + PTR_ERR(r)); + bp->health = r; +} + +static void __iomem * +__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size) +{ + struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp"); + + return devm_ioremap_resource(&bp->pdev->dev, &res); +} + +static void __iomem * +ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r) +{ + unsigned long start; + + start = pci_resource_start(bp->pdev, 0) + r->offset; + return __ptp_ocp_get_mem(bp, start, r->size); +} + +static void +ptp_ocp_set_irq_resource(struct resource *res, int irq) +{ + struct resource r = DEFINE_RES_IRQ(irq); + *res = r; +} + +static void +ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size) +{ + struct resource r = DEFINE_RES_MEM(start, size); + *res = r; +} + +static int +ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct ptp_ocp_flash_info *info; + struct pci_dev *pdev = bp->pdev; + struct platform_device *p; + struct resource res[2]; + unsigned long start; + int id; + + /* XXX hack to work around old FPGA */ + if (bp->n_irqs < 10) { + dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n"); + return 0; + } + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "spi device irq %d out of range\n", + r->irq_vec); + return 0; + } + + start = pci_resource_start(pdev, 0) + r->offset; + ptp_ocp_set_mem_resource(&res[0], start, r->size); + ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + + info = r->extra; + id = pci_dev_id(pdev) << 1; + id += info->pci_offset; + + p = platform_device_register_resndata(&pdev->dev, info->name, id, + res, 2, info->data, + info->data_size); + if (IS_ERR(p)) + return PTR_ERR(p); + + bp_assign_entry(bp, r, p); + + return 0; +} + +static struct platform_device * +ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id) +{ + struct resource res[2]; + unsigned long start; + + start = pci_resource_start(pdev, 0) + r->offset; + ptp_ocp_set_mem_resource(&res[0], start, r->size); + ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + + return platform_device_register_resndata(&pdev->dev, "xiic-i2c", + id, res, 2, NULL, 0); +} + +static int +ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct platform_device *p; + struct clk_hw *clk; + char buf[32]; + int id; + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n", + r->irq_vec); + return 0; + } + + id = pci_dev_id(bp->pdev); + + sprintf(buf, "AXI.%d", id); + clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000); + if (IS_ERR(clk)) + return PTR_ERR(clk); + bp->i2c_clk = clk; + + sprintf(buf, "xiic-i2c.%d", id); + devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf); + p = ptp_ocp_i2c_bus(bp->pdev, r, id); + if (IS_ERR(p)) + return PTR_ERR(p); + + bp_assign_entry(bp, r, p); + + return 0; +} + +static irqreturn_t +ptp_ocp_ts_irq(int irq, void *priv) +{ + struct ptp_ocp_ext_src *ext = priv; + struct ts_reg __iomem *reg = ext->mem; + struct ptp_clock_event ev; + u32 sec, nsec; + + /* XXX should fix API - this converts s/ns -> ts -> s/ns */ + sec = ioread32(®->time_sec); + nsec = ioread32(®->time_ns); + + ev.type = PTP_CLOCK_EXTTS; + ev.index = ext->info->index; + ev.timestamp = sec * 1000000000ULL + nsec; + + ptp_clock_event(ext->bp->ptp, &ev); + + iowrite32(1, ®->intr); /* write 1 to ack */ + + return IRQ_HANDLED; +} + +static int +ptp_ocp_ts_enable(void *priv, bool enable) +{ + struct ptp_ocp_ext_src *ext = priv; + struct ts_reg __iomem *reg = ext->mem; + + if (enable) { + iowrite32(1, ®->enable); + iowrite32(1, ®->intr_mask); + iowrite32(1, ®->intr); + } else { + iowrite32(0, ®->intr_mask); + iowrite32(0, ®->enable); + } + + return 0; +} + +static void +ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext) +{ + ext->info->enable(ext, false); + pci_free_irq(ext->bp->pdev, ext->irq_vec, ext); + kfree(ext); +} + +static int +ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct ptp_ocp_ext_src *ext; + int err; + + ext = kzalloc(sizeof(*ext), GFP_KERNEL); + if (!ext) return -ENOMEM; - bp->pdev = pdev; - pci_set_drvdata(pdev, bp); - err = pci_enable_device(pdev); + err = -EINVAL; + ext->mem = ptp_ocp_get_mem(bp, r); + if (!ext->mem) + goto out; + + ext->bp = bp; + ext->info = r->extra; + ext->irq_vec = r->irq_vec; + + err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL, + ext, "ocp%d.%s", bp->id, ext->info->name); if (err) { - dev_err(&pdev->dev, "pci_enable_device\n"); - goto out_free; + dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec); + goto out; } - err = pci_request_regions(pdev, KBUILD_MODNAME); - if (err) { - dev_err(&pdev->dev, "pci_request_region\n"); - goto out_disable; + bp_assign_entry(bp, r, ext); + + return 0; + +out: + kfree(ext); + return err; +} + +static int +ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct uart_8250_port uart; + + /* Setting UPF_IOREMAP and leaving port.membase unspecified lets + * the serial port device claim and release the pci resource. + */ + memset(&uart, 0, sizeof(uart)); + uart.port.dev = &pdev->dev; + uart.port.iotype = UPIO_MEM; + uart.port.regshift = 2; + uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset; + uart.port.irq = pci_irq_vector(pdev, r->irq_vec); + uart.port.uartclk = 50000000; + uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP; + uart.port.type = PORT_16550A; + + return serial8250_register_8250_port(&uart); +} + +static int +ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r) +{ + int port; + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "serial device irq %d out of range\n", + r->irq_vec); + return 0; } - bp->base = pci_ioremap_bar(pdev, 0); - if (!bp->base) { - dev_err(&pdev->dev, "io_remap bar0\n"); - err = -ENOMEM; - goto out_release_regions; + port = ptp_ocp_serial_line(bp, r); + if (port < 0) + return port; + + bp_assign_entry(bp, r, port); + + return 0; +} + +static int +ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r) +{ + void __iomem *mem; + + mem = ptp_ocp_get_mem(bp, r); + if (!mem) + return -EINVAL; + + bp_assign_entry(bp, r, mem); + + return 0; +} + +/* FB specific board initializers; last "resource" registered. */ +static int +ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) +{ + bp->flash_start = 1024 * 4096; + + return ptp_ocp_init_clock(bp); +} + +static int +ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data) +{ + struct ocp_resource *r, *table; + int err = 0; + + table = (struct ocp_resource *)driver_data; + for (r = table; r->setup; r++) { + err = r->setup(bp, r); + if (err) + break; } - bp->reg = bp->base + OCP_REGISTER_OFFSET; - bp->tod = bp->base + TOD_REGISTER_OFFSET; + return err; +} + +static ssize_t +serialnum_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + + if (!bp->has_serial) + ptp_ocp_get_serial_number(bp); + + return sysfs_emit(buf, "%pM\n", bp->serial); +} +static DEVICE_ATTR_RO(serialnum); + +static ssize_t +gps_sync_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + ssize_t ret; + + if (bp->gps_lost) + ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gps_lost); + else + ret = sysfs_emit(buf, "SYNC\n"); + + return ret; +} +static DEVICE_ATTR_RO(gps_sync); + +static ssize_t +clock_source_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + const char *p; + u32 select; + + select = ioread32(&bp->reg->select); + p = ptp_ocp_clock_name_from_val(select >> 16); + + return sysfs_emit(buf, "%s\n", p); +} + +static ssize_t +clock_source_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + unsigned long flags; + int val; + + val = ptp_ocp_clock_val_from_name(buf); + if (val < 0) + return val; + + spin_lock_irqsave(&bp->lock, flags); + iowrite32(val, &bp->reg->select); + spin_unlock_irqrestore(&bp->lock, flags); + + return count; +} +static DEVICE_ATTR_RW(clock_source); + +static ssize_t +available_clock_sources_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const char *clk; + ssize_t count; + int i; + + count = 0; + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) { + clk = ptp_ocp_clock[i].name; + count += sysfs_emit_at(buf, count, "%s ", clk); + } + if (count) + count--; + count += sysfs_emit_at(buf, count, "\n"); + return count; +} +static DEVICE_ATTR_RO(available_clock_sources); + +static struct attribute *timecard_attrs[] = { + &dev_attr_serialnum.attr, + &dev_attr_gps_sync.attr, + &dev_attr_clock_source.attr, + &dev_attr_available_clock_sources.attr, + NULL, +}; +ATTRIBUTE_GROUPS(timecard); + +static void +ptp_ocp_dev_release(struct device *dev) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + + mutex_lock(&ptp_ocp_lock); + idr_remove(&ptp_ocp_idr, bp->id); + mutex_unlock(&ptp_ocp_lock); +} + +static int +ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) +{ + int err; + + mutex_lock(&ptp_ocp_lock); + err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL); + mutex_unlock(&ptp_ocp_lock); + if (err < 0) { + dev_err(&pdev->dev, "idr_alloc failed: %d\n", err); + return err; + } + bp->id = err; + bp->ptp_info = ptp_ocp_clock_info; spin_lock_init(&bp->lock); + bp->gps_port = -1; + bp->mac_port = -1; + bp->pdev = pdev; + + device_initialize(&bp->dev); + dev_set_name(&bp->dev, "ocp%d", bp->id); + bp->dev.class = &timecard_class; + bp->dev.parent = &pdev->dev; + bp->dev.release = ptp_ocp_dev_release; + dev_set_drvdata(&bp->dev, bp); + + err = device_add(&bp->dev); + if (err) { + dev_err(&bp->dev, "device add failed: %d\n", err); + put_device(&bp->dev); + goto out; + } + + pci_set_drvdata(pdev, bp); + + return 0; + +out: + ptp_ocp_dev_release(&bp->dev); + return err; +} + +static void +ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link) +{ + struct device *dev = &bp->dev; + + if (sysfs_create_link(&dev->kobj, &child->kobj, link)) + dev_err(dev, "%s symlink failed\n", link); +} + +static void +ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link) +{ + struct device *dev, *child; + + dev = &bp->pdev->dev; + + child = device_find_child_by_name(dev, name); + if (!child) { + dev_err(dev, "Could not find device %s\n", name); + return; + } + + ptp_ocp_symlink(bp, child, link); + put_device(child); +} + +static int +ptp_ocp_complete(struct ptp_ocp *bp) +{ + struct pps_device *pps; + char buf[32]; + + if (bp->gps_port != -1) { + sprintf(buf, "ttyS%d", bp->gps_port); + ptp_ocp_link_child(bp, buf, "ttyGPS"); + } + if (bp->mac_port != -1) { + sprintf(buf, "ttyS%d", bp->mac_port); + ptp_ocp_link_child(bp, buf, "ttyMAC"); + } + sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp)); + ptp_ocp_link_child(bp, buf, "ptp"); + + pps = pps_lookup_dev(bp->ptp); + if (pps) + ptp_ocp_symlink(bp, pps->dev, "pps"); + + if (device_add_groups(&bp->dev, timecard_groups)) + pr_err("device add groups failed\n"); + + return 0; +} + +static void +ptp_ocp_resource_summary(struct ptp_ocp *bp) +{ + struct device *dev = &bp->pdev->dev; + + if (bp->image) { + u32 ver = ioread32(&bp->image->version); + + dev_info(dev, "version %x\n", ver); + if (ver & 0xffff) + dev_info(dev, "regular image, version %d\n", + ver & 0xffff); + else + dev_info(dev, "golden image, version %d\n", + ver >> 16); + } + if (bp->gps_port != -1) + dev_info(dev, "GPS @ /dev/ttyS%d 115200\n", bp->gps_port); + if (bp->mac_port != -1) + dev_info(dev, "MAC @ /dev/ttyS%d 57600\n", bp->mac_port); +} - err = ptp_ocp_check_clock(bp); +static void +ptp_ocp_detach_sysfs(struct ptp_ocp *bp) +{ + struct device *dev = &bp->dev; + + sysfs_remove_link(&dev->kobj, "ttyGPS"); + sysfs_remove_link(&dev->kobj, "ttyMAC"); + sysfs_remove_link(&dev->kobj, "ptp"); + sysfs_remove_link(&dev->kobj, "pps"); + device_remove_groups(dev, timecard_groups); +} + +static void +ptp_ocp_detach(struct ptp_ocp *bp) +{ + ptp_ocp_detach_sysfs(bp); + if (timer_pending(&bp->watchdog)) + del_timer_sync(&bp->watchdog); + if (bp->ts0) + ptp_ocp_unregister_ext(bp->ts0); + if (bp->ts1) + ptp_ocp_unregister_ext(bp->ts1); + if (bp->pps) + ptp_ocp_unregister_ext(bp->pps); + if (bp->gps_port != -1) + serial8250_unregister_port(bp->gps_port); + if (bp->mac_port != -1) + serial8250_unregister_port(bp->mac_port); + if (bp->spi_flash) + platform_device_unregister(bp->spi_flash); + if (bp->i2c_ctrl) + platform_device_unregister(bp->i2c_ctrl); + if (bp->i2c_clk) + clk_hw_unregister_fixed_rate(bp->i2c_clk); + if (bp->n_irqs) + pci_free_irq_vectors(bp->pdev); + if (bp->ptp) + ptp_clock_unregister(bp->ptp); + if (bp->health) + devlink_health_reporter_destroy(bp->health); + device_unregister(&bp->dev); +} + +static int +ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct devlink *devlink; + struct ptp_ocp *bp; + int err; + + devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp)); + if (!devlink) { + dev_err(&pdev->dev, "devlink_alloc failed\n"); + return -ENOMEM; + } + + err = ptp_ocp_devlink_register(devlink, &pdev->dev); + if (err) + goto out_free; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device\n"); + goto out_unregister; + } + + bp = devlink_priv(devlink); + err = ptp_ocp_device_init(bp, pdev); + if (err) + goto out_unregister; + + /* compat mode. + * Older FPGA firmware only returns 2 irq's. + * allow this - if not all of the IRQ's are returned, skip the + * extra devices and just register the clock. + */ + err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (err < 0) { + dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err); + goto out; + } + bp->n_irqs = err; + pci_set_master(pdev); + + err = ptp_ocp_register_resources(bp, id->driver_data); if (err) goto out; bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev); if (IS_ERR(bp->ptp)) { - dev_err(&pdev->dev, "ptp_clock_register\n"); err = PTR_ERR(bp->ptp); + dev_err(&pdev->dev, "ptp_clock_register: %d\n", err); + bp->ptp = NULL; goto out; } + err = ptp_ocp_complete(bp); + if (err) + goto out; + ptp_ocp_info(bp); + ptp_ocp_resource_summary(bp); + ptp_ocp_devlink_health_register(devlink); return 0; out: - pci_iounmap(pdev, bp->base); -out_release_regions: - pci_release_regions(pdev); -out_disable: + ptp_ocp_detach(bp); pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +out_unregister: + ptp_ocp_devlink_unregister(devlink); out_free: - kfree(bp); + devlink_free(devlink); return err; } @@ -362,13 +1591,14 @@ static void ptp_ocp_remove(struct pci_dev *pdev) { struct ptp_ocp *bp = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(bp); - ptp_clock_unregister(bp->ptp); - pci_iounmap(pdev, bp->base); - pci_release_regions(pdev); + ptp_ocp_detach(bp); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); - kfree(bp); + + ptp_ocp_devlink_unregister(devlink); + devlink_free(devlink); } static struct pci_driver ptp_ocp_driver = { @@ -378,19 +1608,84 @@ static struct pci_driver ptp_ocp_driver = { .remove = ptp_ocp_remove, }; +static int +ptp_ocp_i2c_notifier_call(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev, *child = data; + struct ptp_ocp *bp; + bool add; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + case BUS_NOTIFY_DEL_DEVICE: + add = action == BUS_NOTIFY_ADD_DEVICE; + break; + default: + return 0; + } + + if (!i2c_verify_adapter(child)) + return 0; + + dev = child; + while ((dev = dev->parent)) + if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME)) + goto found; + return 0; + +found: + bp = dev_get_drvdata(dev); + if (add) + ptp_ocp_symlink(bp, child, "i2c"); + else + sysfs_remove_link(&bp->dev.kobj, "i2c"); + + return 0; +} + +static struct notifier_block ptp_ocp_i2c_notifier = { + .notifier_call = ptp_ocp_i2c_notifier_call, +}; + static int __init ptp_ocp_init(void) { + const char *what; int err; + what = "timecard class"; + err = class_register(&timecard_class); + if (err) + goto out; + + what = "i2c notifier"; + err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); + if (err) + goto out_notifier; + + what = "ptp_ocp driver"; err = pci_register_driver(&ptp_ocp_driver); + if (err) + goto out_register; + + return 0; + +out_register: + bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); +out_notifier: + class_unregister(&timecard_class); +out: + pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err); return err; } static void __exit ptp_ocp_fini(void) { + bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); pci_unregister_driver(&ptp_ocp_driver); + class_unregister(&timecard_class); } module_init(ptp_ocp_init); diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 9748165e08e9..acbe76a76fb2 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -504,28 +504,6 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver) EXPORT_SYMBOL(ccwgroup_driver_unregister); /** - * get_ccwgroupdev_by_busid() - obtain device from a bus id - * @gdrv: driver the device is owned by - * @bus_id: bus id of the device to be searched - * - * This function searches all devices owned by @gdrv for a device with a bus - * id matching @bus_id. - * Returns: - * If a match is found, its reference count of the found device is increased - * and it is returned; else %NULL is returned. - */ -struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv, - char *bus_id) -{ - struct device *dev; - - dev = driver_find_device_by_name(&gdrv->driver, bus_id); - - return dev ? to_ccwgroupdev(dev) : NULL; -} -EXPORT_SYMBOL_GPL(get_ccwgroupdev_by_busid); - -/** * ccwgroup_probe_ccwdev() - probe function for slave devices * @cdev: ccw device to be probed * diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index bf236d474538..cff91b4f1a76 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -88,15 +88,6 @@ config QETH_L3 To compile as a module choose M. The module name is qeth_l3. If unsure, choose Y. -config QETH_OSN - def_bool !HAVE_MARCH_Z14_FEATURES - prompt "qeth OSN device support" - depends on QETH - help - This enables the qeth driver to support devices in OSN mode. - This feature will be removed in 2021. - If unsure, choose N. - config QETH_OSX def_bool !HAVE_MARCH_Z15_FEATURES prompt "qeth OSX device support" diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index f4d554ea0c93..535a60b3946d 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -259,22 +259,10 @@ struct qeth_hdr_layer2 { __u8 reserved2[16]; } __attribute__ ((packed)); -struct qeth_hdr_osn { - __u8 id; - __u8 reserved; - __u16 seq_no; - __u16 reserved2; - __u16 control_flags; - __u16 pdu_length; - __u8 reserved3[18]; - __u32 ccid; -} __attribute__ ((packed)); - struct qeth_hdr { union { struct qeth_hdr_layer2 l2; struct qeth_hdr_layer3 l3; - struct qeth_hdr_osn osn; } hdr; } __attribute__ ((packed)); @@ -341,7 +329,6 @@ enum qeth_header_ids { QETH_HEADER_TYPE_LAYER3 = 0x01, QETH_HEADER_TYPE_LAYER2 = 0x02, QETH_HEADER_TYPE_L3_TSO = 0x03, - QETH_HEADER_TYPE_OSN = 0x04, QETH_HEADER_TYPE_L2_TSO = 0x06, QETH_HEADER_MASK_INVAL = 0x80, }; @@ -779,18 +766,13 @@ enum qeth_threads { QETH_RECOVER_THREAD = 1, }; -struct qeth_osn_info { - int (*assist_cb)(struct net_device *dev, void *data); - int (*data_cb)(struct sk_buff *skb); -}; - struct qeth_discipline { - const struct device_type *devtype; int (*setup) (struct ccwgroup_device *); void (*remove) (struct ccwgroup_device *); int (*set_online)(struct qeth_card *card, bool carrier_ok); void (*set_offline)(struct qeth_card *card); - int (*do_ioctl)(struct net_device *dev, struct ifreq *rq, int cmd); + int (*do_ioctl)(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); int (*control_event_handler)(struct qeth_card *card, struct qeth_ipa_cmd *cmd); }; @@ -865,7 +847,6 @@ struct qeth_card { /* QDIO buffer handling */ struct qeth_qdio_info qdio; int read_or_write_problem; - struct qeth_osn_info osn_info; const struct qeth_discipline *discipline; atomic_t force_alloc_skb; struct service_level qeth_service_level; @@ -1058,10 +1039,7 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb); extern const struct qeth_discipline qeth_l2_discipline; extern const struct qeth_discipline qeth_l3_discipline; extern const struct ethtool_ops qeth_ethtool_ops; -extern const struct ethtool_ops qeth_osn_ethtool_ops; extern const struct attribute_group *qeth_dev_groups[]; -extern const struct attribute_group *qeth_osn_dev_groups[]; -extern const struct device_type qeth_generic_devtype; const char *qeth_get_cardname_short(struct qeth_card *); int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count); @@ -1069,11 +1047,9 @@ int qeth_setup_discipline(struct qeth_card *card, enum qeth_discipline_id disc); void qeth_remove_discipline(struct qeth_card *card); /* exports for qeth discipline device drivers */ -extern struct kmem_cache *qeth_core_header_cache; extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS]; struct net_device *qeth_clone_netdev(struct net_device *orig); -struct qeth_card *qeth_get_card_by_busid(char *bus_id); void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask); int qeth_threads_running(struct qeth_card *, unsigned long); @@ -1088,9 +1064,6 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card, enum qeth_ipa_cmds cmd_code, enum qeth_prot_versions prot, unsigned int data_length); -struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, - unsigned int length, unsigned int ccws, - long timeout); struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card, enum qeth_ipa_funcs ipa_func, u16 cmd_code, @@ -1099,18 +1072,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card, struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card, enum qeth_diags_cmds sub_cmd, unsigned int data_length); -void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason); -void qeth_put_cmd(struct qeth_cmd_buffer *iob); int qeth_schedule_recovery(struct qeth_card *card); int qeth_poll(struct napi_struct *napi, int budget); void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable); int qeth_setadpparms_change_macaddr(struct qeth_card *); void qeth_tx_timeout(struct net_device *, unsigned int txqueue); -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, - u16 cmd_length, - bool (*match)(struct qeth_cmd_buffer *iob, - struct qeth_cmd_buffer *reply)); int qeth_query_switch_attributes(struct qeth_card *card, struct qeth_switch_info *sw_info); int qeth_query_card_info(struct qeth_card *card, @@ -1118,12 +1085,9 @@ int qeth_query_card_info(struct qeth_card *card, int qeth_setadpparms_set_access_ctrl(struct qeth_card *card, enum qeth_ipa_isolation_modes mode); -unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset); -int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, - struct sk_buff *skb, struct qeth_hdr *hdr, - unsigned int offset, unsigned int hd_len, - int elements_needed); int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +int qeth_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...); int qeth_configure_cq(struct qeth_card *, enum qeth_cq); int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); @@ -1148,11 +1112,4 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_hdr *hdr, struct sk_buff *skb, __be16 proto, unsigned int data_len)); -/* exports for OSN */ -int qeth_osn_assist(struct net_device *, void *, int); -int qeth_osn_register(unsigned char *read_dev_no, struct net_device **, - int (*assist_cb)(struct net_device *, void *), - int (*data_cb)(struct sk_buff *)); -void qeth_osn_deregister(struct net_device *); - #endif /* __QETH_CORE_H__ */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 62f88ccbd03f..5b973f377504 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -57,8 +57,7 @@ struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS] = { }; EXPORT_SYMBOL_GPL(qeth_dbf); -struct kmem_cache *qeth_core_header_cache; -EXPORT_SYMBOL_GPL(qeth_core_header_cache); +static struct kmem_cache *qeth_core_header_cache; static struct kmem_cache *qeth_qdio_outbuf_cache; static struct device *qeth_core_root_dev; @@ -101,8 +100,6 @@ static const char *qeth_get_cardname(struct qeth_card *card) return " OSD Express"; case QETH_CARD_TYPE_IQD: return " HiperSockets"; - case QETH_CARD_TYPE_OSN: - return " OSN QDIO"; case QETH_CARD_TYPE_OSM: return " OSM QDIO"; case QETH_CARD_TYPE_OSX: @@ -157,8 +154,6 @@ const char *qeth_get_cardname_short(struct qeth_card *card) } case QETH_CARD_TYPE_IQD: return "HiperSockets"; - case QETH_CARD_TYPE_OSN: - return "OSN"; case QETH_CARD_TYPE_OSM: return "OSM_1000"; case QETH_CARD_TYPE_OSX: @@ -431,6 +426,13 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, return n; } +static void qeth_put_cmd(struct qeth_cmd_buffer *iob) +{ + if (refcount_dec_and_test(&iob->ref_count)) { + kfree(iob->data); + kfree(iob); + } +} static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len, void *data) { @@ -499,12 +501,11 @@ static void qeth_dequeue_cmd(struct qeth_card *card, spin_unlock_irq(&card->lock); } -void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason) +static void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason) { iob->rc = reason; complete(&iob->done); } -EXPORT_SYMBOL_GPL(qeth_notify_cmd); static void qeth_flush_local_addrs4(struct qeth_card *card) { @@ -781,10 +782,7 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, QETH_CARD_TEXT(card, 5, "chkipad"); if (IS_IPA_REPLY(cmd)) { - if (cmd->hdr.command != IPA_CMD_SETCCID && - cmd->hdr.command != IPA_CMD_DELCCID && - cmd->hdr.command != IPA_CMD_MODCCID && - cmd->hdr.command != IPA_CMD_SET_DIAG_ASS) + if (cmd->hdr.command != IPA_CMD_SET_DIAG_ASS) qeth_issue_ipa_msg(cmd, cmd->hdr.return_code, card); return cmd; } @@ -819,8 +817,6 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, if (card->discipline->control_event_handler(card, cmd)) return cmd; return NULL; - case IPA_CMD_MODCCID: - return cmd; case IPA_CMD_REGISTER_LOCAL_ADDR: if (cmd->hdr.prot_version == QETH_PROT_IPV4) qeth_add_local_addrs4(card, &cmd->data.local_addrs4); @@ -877,15 +873,6 @@ static int qeth_check_idx_response(struct qeth_card *card, return 0; } -void qeth_put_cmd(struct qeth_cmd_buffer *iob) -{ - if (refcount_dec_and_test(&iob->ref_count)) { - kfree(iob->data); - kfree(iob); - } -} -EXPORT_SYMBOL_GPL(qeth_put_cmd); - static void qeth_release_buffer_cb(struct qeth_card *card, struct qeth_cmd_buffer *iob, unsigned int data_length) @@ -899,9 +886,9 @@ static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc) qeth_put_cmd(iob); } -struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, - unsigned int length, unsigned int ccws, - long timeout) +static struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, + unsigned int length, + unsigned int ccws, long timeout) { struct qeth_cmd_buffer *iob; @@ -927,7 +914,6 @@ struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, iob->length = length; return iob; } -EXPORT_SYMBOL_GPL(qeth_alloc_cmd); static void qeth_issue_next_read_cb(struct qeth_card *card, struct qeth_cmd_buffer *iob, @@ -958,11 +944,6 @@ static void qeth_issue_next_read_cb(struct qeth_card *card, cmd = qeth_check_ipa_data(card, cmd); if (!cmd) goto out; - if (IS_OSN(card) && card->osn_info.assist_cb && - cmd->hdr.command != IPA_CMD_STARTLAN) { - card->osn_info.assist_cb(card->dev, cmd); - goto out; - } } /* match against pending cmd requests */ @@ -1835,7 +1816,7 @@ static enum qeth_discipline_id qeth_enforce_discipline(struct qeth_card *card) { enum qeth_discipline_id disc = QETH_DISCIPLINE_UNDETERMINED; - if (IS_OSM(card) || IS_OSN(card)) + if (IS_OSM(card)) disc = QETH_DISCIPLINE_LAYER2; else if (IS_VM_NIC(card)) disc = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 : @@ -1885,7 +1866,6 @@ static void qeth_idx_init(struct qeth_card *card) card->info.func_level = QETH_IDX_FUNC_LEVEL_IQD; break; case QETH_CARD_TYPE_OSD: - case QETH_CARD_TYPE_OSN: card->info.func_level = QETH_IDX_FUNC_LEVEL_OSD; break; default: @@ -2442,9 +2422,7 @@ static int qeth_ulp_enable_cb(struct qeth_card *card, struct qeth_reply *reply, static u8 qeth_mpc_select_prot_type(struct qeth_card *card) { - if (IS_OSN(card)) - return QETH_PROT_OSN2; - return IS_LAYER2(card) ? QETH_PROT_LAYER2 : QETH_PROT_TCPIP; + return IS_LAYER2(card) ? QETH_MPC_PROT_L2 : QETH_MPC_PROT_L3; } static int qeth_ulp_enable(struct qeth_card *card) @@ -3000,10 +2978,8 @@ static void qeth_ipa_finalize_cmd(struct qeth_card *card, __ipa_cmd(iob)->hdr.seqno = card->seqno.ipa++; } -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, - u16 cmd_length, - bool (*match)(struct qeth_cmd_buffer *iob, - struct qeth_cmd_buffer *reply)) +static void qeth_prepare_ipa_cmd(struct qeth_card *card, + struct qeth_cmd_buffer *iob, u16 cmd_length) { u8 prot_type = qeth_mpc_select_prot_type(card); u16 total_length = iob->length; @@ -3011,7 +2987,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, total_length, iob->data); iob->finalize = qeth_ipa_finalize_cmd; - iob->match = match; memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE); memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2); @@ -3022,7 +2997,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, &card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH); memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &cmd_length, 2); } -EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd); static bool qeth_ipa_match_reply(struct qeth_cmd_buffer *iob, struct qeth_cmd_buffer *reply) @@ -3046,7 +3020,8 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card, if (!iob) return NULL; - qeth_prepare_ipa_cmd(card, iob, data_length, qeth_ipa_match_reply); + qeth_prepare_ipa_cmd(card, iob, data_length); + iob->match = qeth_ipa_match_reply; hdr = &__ipa_cmd(iob)->hdr; hdr->command = cmd_code; @@ -3894,7 +3869,8 @@ static int qeth_get_elements_for_frags(struct sk_buff *skb) * Returns the number of pages, and thus QDIO buffer elements, needed to map the * skb's data (both its linear part and paged fragments). */ -unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset) +static unsigned int qeth_count_elements(struct sk_buff *skb, + unsigned int data_offset) { unsigned int elements = qeth_get_elements_for_frags(skb); addr_t end = (addr_t)skb->data + skb_headlen(skb); @@ -3904,7 +3880,6 @@ unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset) elements += qeth_get_elements_for_range(start, end); return elements; } -EXPORT_SYMBOL_GPL(qeth_count_elements); #define QETH_HDR_CACHE_OBJ_SIZE (sizeof(struct qeth_hdr_tso) + \ MAX_TCP_HEADER) @@ -4192,10 +4167,11 @@ static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue, return 0; } -int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, - struct sk_buff *skb, struct qeth_hdr *hdr, - unsigned int offset, unsigned int hd_len, - int elements_needed) +static int qeth_do_send_packet(struct qeth_card *card, + struct qeth_qdio_out_q *queue, + struct sk_buff *skb, struct qeth_hdr *hdr, + unsigned int offset, unsigned int hd_len, + unsigned int elements_needed) { unsigned int start_index = queue->next_buf_to_fill; struct qeth_qdio_out_buffer *buffer; @@ -4275,7 +4251,6 @@ out: netif_tx_start_queue(txq); return rc; } -EXPORT_SYMBOL_GPL(qeth_do_send_packet); static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr, unsigned int payload_len, struct sk_buff *skb, @@ -4554,7 +4529,6 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum) case MII_BMCR: /* Basic mode control register */ rc = BMCR_FULLDPLX; if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) && - (card->info.link_type != QETH_LINK_TYPE_OSN) && (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) && (card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH)) rc |= BMCR_SPEED100; @@ -5266,10 +5240,6 @@ static struct ccw_device_id qeth_ids[] = { .driver_info = QETH_CARD_TYPE_OSD}, {CCW_DEVICE_DEVTYPE(0x1731, 0x05, 0x1732, 0x05), .driver_info = QETH_CARD_TYPE_IQD}, -#ifdef CONFIG_QETH_OSN - {CCW_DEVICE_DEVTYPE(0x1731, 0x06, 0x1732, 0x06), - .driver_info = QETH_CARD_TYPE_OSN}, -#endif {CCW_DEVICE_DEVTYPE(0x1731, 0x02, 0x1732, 0x03), .driver_info = QETH_CARD_TYPE_OSM}, #ifdef CONFIG_QETH_OSX @@ -5628,14 +5598,6 @@ static void qeth_receive_skb(struct qeth_card *card, struct sk_buff *skb, bool is_cso; switch (hdr->hdr.l2.id) { - case QETH_HEADER_TYPE_OSN: - skb_push(skb, sizeof(*hdr)); - skb_copy_to_linear_data(skb, hdr, sizeof(*hdr)); - QETH_CARD_STAT_ADD(card, rx_bytes, skb->len); - QETH_CARD_STAT_INC(card, rx_packets); - - card->osn_info.data_cb(skb); - return; #if IS_ENABLED(CONFIG_QETH_L3) case QETH_HEADER_TYPE_LAYER3: qeth_l3_rebuild_skb(card, skb, hdr); @@ -5750,16 +5712,6 @@ next_packet: linear_len = sizeof(struct iphdr); headroom = ETH_HLEN; break; - case QETH_HEADER_TYPE_OSN: - skb_len = hdr->hdr.osn.pdu_length; - if (!IS_OSN(card)) { - QETH_CARD_STAT_INC(card, rx_dropped_notsupp); - goto walk_packet; - } - - linear_len = skb_len; - headroom = sizeof(struct qeth_hdr); - break; default: if (hdr->hdr.l2.id & QETH_HEADER_MASK_INVAL) QETH_CARD_STAT_INC(card, rx_frame_errors); @@ -5777,8 +5729,7 @@ next_packet: use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) || (skb_len > READ_ONCE(priv->rx_copybreak) && - !atomic_read(&card->force_alloc_skb) && - !IS_OSN(card)); + !atomic_read(&card->force_alloc_skb)); if (use_rx_sg) { /* QETH_CQ_ENABLED only: */ @@ -6335,14 +6286,9 @@ void qeth_remove_discipline(struct qeth_card *card) card->discipline = NULL; } -const struct device_type qeth_generic_devtype = { +static const struct device_type qeth_generic_devtype = { .name = "qeth_generic", }; -EXPORT_SYMBOL_GPL(qeth_generic_devtype); - -static const struct device_type qeth_osn_devtype = { - .name = "qeth_osn", -}; #define DBF_NAME_LEN 20 @@ -6425,10 +6371,6 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) case QETH_CARD_TYPE_OSM: dev = alloc_etherdev(sizeof(*priv)); break; - case QETH_CARD_TYPE_OSN: - dev = alloc_netdev(sizeof(*priv), "osn%d", NET_NAME_UNKNOWN, - ether_setup); - break; default: dev = alloc_etherdev_mqs(sizeof(*priv), QETH_MAX_OUT_QUEUES, 1); } @@ -6442,23 +6384,19 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) dev->ml_priv = card; dev->watchdog_timeo = QETH_TX_TIMEOUT; - dev->min_mtu = IS_OSN(card) ? 64 : 576; + dev->min_mtu = 576; /* initialized when device first goes online: */ dev->max_mtu = 0; dev->mtu = 0; SET_NETDEV_DEV(dev, &card->gdev->dev); netif_carrier_off(dev); - if (IS_OSN(card)) { - dev->ethtool_ops = &qeth_osn_ethtool_ops; - } else { - dev->ethtool_ops = &qeth_ethtool_ops; - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->hw_features |= NETIF_F_SG; - dev->vlan_features |= NETIF_F_SG; - if (IS_IQD(card)) - dev->features |= NETIF_F_SG; - } + dev->ethtool_ops = &qeth_ethtool_ops; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->hw_features |= NETIF_F_SG; + dev->vlan_features |= NETIF_F_SG; + if (IS_IQD(card)) + dev->features |= NETIF_F_SG; return dev; } @@ -6521,10 +6459,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) if (rc) goto err_chp_desc; - if (IS_OSN(card)) - gdev->dev.groups = qeth_osn_dev_groups; - else - gdev->dev.groups = qeth_dev_groups; + gdev->dev.groups = qeth_dev_groups; enforced_disc = qeth_enforce_discipline(card); switch (enforced_disc) { @@ -6538,8 +6473,6 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) if (rc) goto err_setup_disc; - gdev->dev.type = IS_OSN(card) ? &qeth_osn_devtype : - card->discipline->devtype; break; } @@ -6657,36 +6590,42 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = { .shutdown = qeth_core_shutdown, }; -struct qeth_card *qeth_get_card_by_busid(char *bus_id) -{ - struct ccwgroup_device *gdev; - struct qeth_card *card; - - gdev = get_ccwgroupdev_by_busid(&qeth_core_ccwgroup_driver, bus_id); - if (!gdev) - return NULL; - - card = dev_get_drvdata(&gdev->dev); - put_device(&gdev->dev); - return card; -} -EXPORT_SYMBOL_GPL(qeth_get_card_by_busid); - -int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +int qeth_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct qeth_card *card = dev->ml_priv; - struct mii_ioctl_data *mii_data; int rc = 0; switch (cmd) { case SIOC_QETH_ADP_SET_SNMP_CONTROL: - rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data); + rc = qeth_snmp_command(card, data); break; case SIOC_QETH_GET_CARD_TYPE: if ((IS_OSD(card) || IS_OSM(card) || IS_OSX(card)) && !IS_VM_NIC(card)) return 1; return 0; + case SIOC_QETH_QUERY_OAT: + rc = qeth_query_oat_command(card, data); + break; + default: + if (card->discipline->do_ioctl) + rc = card->discipline->do_ioctl(dev, rq, data, cmd); + else + rc = -EOPNOTSUPP; + } + if (rc) + QETH_CARD_TEXT_(card, 2, "ioce%x", rc); + return rc; +} +EXPORT_SYMBOL_GPL(qeth_siocdevprivate); + +int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct qeth_card *card = dev->ml_priv; + struct mii_ioctl_data *mii_data; + int rc = 0; + + switch (cmd) { case SIOCGMIIPHY: mii_data = if_mii(rq); mii_data->phy_id = 0; @@ -6699,14 +6638,8 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) mii_data->val_out = qeth_mdio_read(dev, mii_data->phy_id, mii_data->reg_num); break; - case SIOC_QETH_QUERY_OAT: - rc = qeth_query_oat_command(card, rq->ifr_ifru.ifru_data); - break; default: - if (card->discipline->do_ioctl) - rc = card->discipline->do_ioctl(dev, rq, cmd); - else - rc = -EOPNOTSUPP; + return -EOPNOTSUPP; } if (rc) QETH_CARD_TEXT_(card, 2, "ioce%x", rc); diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 68c2588b9dcc..d9266f7d8187 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -232,9 +232,6 @@ static const struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_DELVLAN, "delvlan"}, {IPA_CMD_VNICC, "vnic_characteristics"}, {IPA_CMD_SETBRIDGEPORT_OSA, "set_bridge_port(osa)"}, - {IPA_CMD_SETCCID, "setccid"}, - {IPA_CMD_DELCCID, "delccid"}, - {IPA_CMD_MODCCID, "modccid"}, {IPA_CMD_SETIP, "setip"}, {IPA_CMD_QIPASSIST, "qipassist"}, {IPA_CMD_SETASSPARMS, "setassparms"}, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index e4bde7daf083..6257f00786b3 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -34,8 +34,6 @@ extern const unsigned char IPA_PDU_HEADER[]; /*****************************************************************************/ #define IPA_CMD_INITIATOR_HOST 0x00 #define IPA_CMD_INITIATOR_OSA 0x01 -#define IPA_CMD_INITIATOR_HOST_REPLY 0x80 -#define IPA_CMD_INITIATOR_OSA_REPLY 0x81 #define IPA_CMD_PRIM_VERSION_NO 0x01 struct qeth_ipa_caps { @@ -66,7 +64,6 @@ static inline bool qeth_ipa_caps_enabled(struct qeth_ipa_caps *caps, u32 mask) enum qeth_card_types { QETH_CARD_TYPE_OSD = 1, QETH_CARD_TYPE_IQD = 5, - QETH_CARD_TYPE_OSN = 6, QETH_CARD_TYPE_OSM = 3, QETH_CARD_TYPE_OSX = 2, }; @@ -75,12 +72,6 @@ enum qeth_card_types { #define IS_OSD(card) ((card)->info.type == QETH_CARD_TYPE_OSD) #define IS_OSM(card) ((card)->info.type == QETH_CARD_TYPE_OSM) -#ifdef CONFIG_QETH_OSN -#define IS_OSN(card) ((card)->info.type == QETH_CARD_TYPE_OSN) -#else -#define IS_OSN(card) false -#endif - #ifdef CONFIG_QETH_OSX #define IS_OSX(card) ((card)->info.type == QETH_CARD_TYPE_OSX) #else @@ -95,7 +86,6 @@ enum qeth_link_types { QETH_LINK_TYPE_FAST_ETH = 0x01, QETH_LINK_TYPE_HSTR = 0x02, QETH_LINK_TYPE_GBIT_ETH = 0x03, - QETH_LINK_TYPE_OSN = 0x04, QETH_LINK_TYPE_10GBIT_ETH = 0x10, QETH_LINK_TYPE_25GBIT_ETH = 0x12, QETH_LINK_TYPE_LANE_ETH100 = 0x81, @@ -126,9 +116,6 @@ enum qeth_ipa_cmds { IPA_CMD_DELVLAN = 0x26, IPA_CMD_VNICC = 0x2a, IPA_CMD_SETBRIDGEPORT_OSA = 0x2b, - IPA_CMD_SETCCID = 0x41, - IPA_CMD_DELCCID = 0x42, - IPA_CMD_MODCCID = 0x43, IPA_CMD_SETIP = 0xb1, IPA_CMD_QIPASSIST = 0xb2, IPA_CMD_SETASSPARMS = 0xb3, @@ -879,8 +866,7 @@ extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); /* Helper functions */ -#define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \ - (cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY)) +#define IS_IPA_REPLY(cmd) ((cmd)->hdr.initiator == IPA_CMD_INITIATOR_HOST) /*****************************************************************************/ /* END OF IP Assist related definitions */ @@ -919,10 +905,9 @@ extern const unsigned char ULP_ENABLE[]; (PDU_ENCAPSULATION(buffer) + 0x17) #define QETH_ULP_ENABLE_RESP_LINK_TYPE(buffer) \ (PDU_ENCAPSULATION(buffer) + 0x2b) -/* Layer 2 definitions */ -#define QETH_PROT_LAYER2 0x08 -#define QETH_PROT_TCPIP 0x03 -#define QETH_PROT_OSN2 0x0a + +#define QETH_MPC_PROT_L2 0x08 +#define QETH_MPC_PROT_L3 0x03 #define QETH_ULP_ENABLE_PROT_TYPE(buffer) (buffer + 0x50) #define QETH_IPA_CMD_PROT_TYPE(buffer) (buffer + 0x19) diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 5815114da468..406be169173c 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -671,11 +671,6 @@ static const struct attribute_group qeth_dev_group = { .attrs = qeth_dev_attrs, }; -const struct attribute_group *qeth_osn_dev_groups[] = { - &qeth_dev_group, - NULL, -}; - const struct attribute_group *qeth_dev_groups[] = { &qeth_dev_group, &qeth_dev_extended_group, diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index 2c4cb300a8fc..3937986f159a 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -469,10 +469,3 @@ const struct ethtool_ops qeth_ethtool_ops = { .set_per_queue_coalesce = qeth_set_per_queue_coalesce, .get_link_ksettings = qeth_get_link_ksettings, }; - -const struct ethtool_ops qeth_osn_ethtool_ops = { - .get_strings = qeth_get_strings, - .get_ethtool_stats = qeth_get_ethtool_stats, - .get_sset_count = qeth_get_sset_count, - .get_drvinfo = qeth_get_drvinfo, -}; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2abf86c104d5..69afc0311dd1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -309,17 +309,16 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card) /* fall back to alternative mechanism: */ } - if (!IS_OSN(card)) { - rc = qeth_setadpparms_change_macaddr(card); - if (!rc) - goto out; - QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n", - CARD_DEVID(card), rc); - QETH_CARD_TEXT_(card, 2, "1err%04x", rc); - /* fall back once more: */ - } + rc = qeth_setadpparms_change_macaddr(card); + if (!rc) + goto out; + QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, "1err%04x", rc); - /* some devices don't support a custom MAC address: */ + /* Fall back once more, but some devices don't support a custom MAC + * address: + */ if (IS_OSM(card) || IS_OSX(card)) return (rc) ? rc : -EADDRNOTAVAIL; eth_hw_addr_random(card->dev); @@ -334,7 +333,7 @@ static void qeth_l2_register_dev_addr(struct qeth_card *card) if (!is_valid_ether_addr(card->dev->dev_addr)) qeth_l2_request_initial_mac(card); - if (!IS_OSN(card) && !qeth_l2_send_setmac(card, card->dev->dev_addr)) + if (!qeth_l2_send_setmac(card, card->dev->dev_addr)) card->info.dev_addr_is_registered = 1; else card->info.dev_addr_is_registered = 0; @@ -496,44 +495,6 @@ static void qeth_l2_rx_mode_work(struct work_struct *work) qeth_l2_set_promisc_mode(card); } -static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb, - struct qeth_qdio_out_q *queue) -{ - gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0); - struct qeth_hdr *hdr = (struct qeth_hdr *)skb->data; - addr_t end = (addr_t)(skb->data + sizeof(*hdr)); - addr_t start = (addr_t)skb->data; - unsigned int elements = 0; - unsigned int hd_len = 0; - int rc; - - if (skb->protocol == htons(ETH_P_IPV6)) - return -EPROTONOSUPPORT; - - if (qeth_get_elements_for_range(start, end) > 1) { - /* Misaligned HW header, move it to its own buffer element. */ - hdr = kmem_cache_alloc(qeth_core_header_cache, gfp); - if (!hdr) - return -ENOMEM; - hd_len = sizeof(*hdr); - skb_copy_from_linear_data(skb, (char *)hdr, hd_len); - elements++; - } - - elements += qeth_count_elements(skb, hd_len); - if (elements > queue->max_elements) { - rc = -E2BIG; - goto out; - } - - rc = qeth_do_send_packet(card, queue, skb, hdr, hd_len, hd_len, - elements); -out: - if (rc && hd_len) - kmem_cache_free(qeth_core_header_cache, hdr); - return rc; -} - static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -548,12 +509,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, txq = qeth_iqd_translate_txq(dev, txq); queue = card->qdio.out_qs[txq]; - if (IS_OSN(card)) - rc = qeth_l2_xmit_osn(card, skb, queue); - else - rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb), - qeth_l2_fill_header); - + rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb), + qeth_l2_fill_header); if (!rc) return NETDEV_TX_OK; @@ -879,7 +836,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_select_queue = qeth_l2_select_queue, .ndo_validate_addr = qeth_l2_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, - .ndo_do_ioctl = qeth_do_ioctl, + .ndo_eth_ioctl = qeth_do_ioctl, + .ndo_siocdevprivate = qeth_siocdevprivate, .ndo_set_mac_address = qeth_l2_set_mac_address, .ndo_vlan_rx_add_vid = qeth_l2_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = qeth_l2_vlan_rx_kill_vid, @@ -890,23 +848,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_bridge_setlink = qeth_l2_bridge_setlink, }; -static const struct net_device_ops qeth_osn_netdev_ops = { - .ndo_open = qeth_open, - .ndo_stop = qeth_stop, - .ndo_get_stats64 = qeth_get_stats64, - .ndo_start_xmit = qeth_l2_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_tx_timeout = qeth_tx_timeout, -}; - static int qeth_l2_setup_netdev(struct qeth_card *card) { - if (IS_OSN(card)) { - card->dev->netdev_ops = &qeth_osn_netdev_ops; - card->dev->flags |= IFF_NOARP; - goto add_napi; - } - card->dev->needed_headroom = sizeof(struct qeth_hdr); card->dev->netdev_ops = &qeth_l2_netdev_ops; card->dev->priv_flags |= IFF_UNICAST_FLT; @@ -952,7 +895,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1)); } -add_napi: netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); return register_netdev(card->dev); } @@ -1044,84 +986,6 @@ static void qeth_l2_enable_brport_features(struct qeth_card *card) } } -#ifdef CONFIG_QETH_OSN -static void qeth_osn_assist_cb(struct qeth_card *card, - struct qeth_cmd_buffer *iob, - unsigned int data_length) -{ - qeth_notify_cmd(iob, 0); - qeth_put_cmd(iob); -} - -int qeth_osn_assist(struct net_device *dev, void *data, int data_len) -{ - struct qeth_cmd_buffer *iob; - struct qeth_card *card; - - if (data_len < 0) - return -EINVAL; - if (!dev) - return -ENODEV; - card = dev->ml_priv; - if (!card) - return -ENODEV; - QETH_CARD_TEXT(card, 2, "osnsdmc"); - if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; - - iob = qeth_alloc_cmd(&card->write, IPA_PDU_HEADER_SIZE + data_len, 1, - QETH_IPA_TIMEOUT); - if (!iob) - return -ENOMEM; - - qeth_prepare_ipa_cmd(card, iob, (u16) data_len, NULL); - - memcpy(__ipa_cmd(iob), data, data_len); - iob->callback = qeth_osn_assist_cb; - return qeth_send_ipa_cmd(card, iob, NULL, NULL); -} -EXPORT_SYMBOL(qeth_osn_assist); - -int qeth_osn_register(unsigned char *read_dev_no, struct net_device **dev, - int (*assist_cb)(struct net_device *, void *), - int (*data_cb)(struct sk_buff *)) -{ - struct qeth_card *card; - char bus_id[16]; - u16 devno; - - memcpy(&devno, read_dev_no, 2); - sprintf(bus_id, "0.0.%04x", devno); - card = qeth_get_card_by_busid(bus_id); - if (!card || !IS_OSN(card)) - return -ENODEV; - *dev = card->dev; - - QETH_CARD_TEXT(card, 2, "osnreg"); - if ((assist_cb == NULL) || (data_cb == NULL)) - return -EINVAL; - card->osn_info.assist_cb = assist_cb; - card->osn_info.data_cb = data_cb; - return 0; -} -EXPORT_SYMBOL(qeth_osn_register); - -void qeth_osn_deregister(struct net_device *dev) -{ - struct qeth_card *card; - - if (!dev) - return; - card = dev->ml_priv; - if (!card) - return; - QETH_CARD_TEXT(card, 2, "osndereg"); - card->osn_info.assist_cb = NULL; - card->osn_info.data_cb = NULL; -} -EXPORT_SYMBOL(qeth_osn_deregister); -#endif - /* SETBRIDGEPORT support, async notifications */ enum qeth_an_event_type {anev_reg_unreg, anev_abort, anev_reset}; @@ -2190,16 +2054,15 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; - if (IS_OSN(card)) - dev_notice(&gdev->dev, "OSN support will be dropped in 2021\n"); - qeth_l2_vnicc_set_defaults(card); mutex_init(&card->sbp_lock); - if (gdev->dev.type == &qeth_generic_devtype) { + if (gdev->dev.type) { rc = device_add_groups(&gdev->dev, qeth_l2_attr_groups); if (rc) return rc; + } else { + gdev->dev.type = &qeth_l2_devtype; } INIT_WORK(&card->rx_mode_work, qeth_l2_rx_mode_work); @@ -2210,8 +2073,9 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); - if (gdev->dev.type == &qeth_generic_devtype) + if (gdev->dev.type != &qeth_l2_devtype) device_remove_groups(&gdev->dev, qeth_l2_attr_groups); + qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); @@ -2331,7 +2195,6 @@ static int qeth_l2_control_event(struct qeth_card *card, } const struct qeth_discipline qeth_l2_discipline = { - .devtype = &qeth_l2_devtype, .setup = qeth_l2_probe_device, .remove = qeth_l2_remove_device, .set_online = qeth_l2_set_online, diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index f0d6f205c53c..3a523e700a5a 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1512,7 +1512,7 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card) return rc; } -static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct qeth_card *card = dev->ml_priv; struct qeth_arp_cache_entry arp_entry; @@ -1532,13 +1532,13 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) rc = -EPERM; break; } - rc = qeth_l3_arp_query(card, rq->ifr_ifru.ifru_data); + rc = qeth_l3_arp_query(card, data); break; case SIOC_QETH_ARP_ADD_ENTRY: case SIOC_QETH_ARP_REMOVE_ENTRY: if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&arp_entry, rq->ifr_data, sizeof(arp_entry))) + if (copy_from_user(&arp_entry, data, sizeof(arp_entry))) return -EFAULT; arp_cmd = (cmd == SIOC_QETH_ARP_ADD_ENTRY) ? @@ -1841,7 +1841,8 @@ static const struct net_device_ops qeth_l3_netdev_ops = { .ndo_select_queue = qeth_l3_iqd_select_queue, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_rx_mode, - .ndo_do_ioctl = qeth_do_ioctl, + .ndo_eth_ioctl = qeth_do_ioctl, + .ndo_siocdevprivate = qeth_siocdevprivate, .ndo_fix_features = qeth_fix_features, .ndo_set_features = qeth_set_features, .ndo_tx_timeout = qeth_tx_timeout, @@ -1856,7 +1857,8 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_select_queue = qeth_l3_osa_select_queue, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_rx_mode, - .ndo_do_ioctl = qeth_do_ioctl, + .ndo_eth_ioctl = qeth_do_ioctl, + .ndo_siocdevprivate = qeth_siocdevprivate, .ndo_fix_features = qeth_fix_features, .ndo_set_features = qeth_set_features, .ndo_tx_timeout = qeth_tx_timeout, @@ -1940,12 +1942,14 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) if (!card->cmd_wq) return -ENOMEM; - if (gdev->dev.type == &qeth_generic_devtype) { + if (gdev->dev.type) { rc = device_add_groups(&gdev->dev, qeth_l3_attr_groups); if (rc) { destroy_workqueue(card->cmd_wq); return rc; } + } else { + gdev->dev.type = &qeth_l3_devtype; } INIT_WORK(&card->rx_mode_work, qeth_l3_rx_mode_work); @@ -1956,7 +1960,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) { struct qeth_card *card = dev_get_drvdata(&cgdev->dev); - if (cgdev->dev.type == &qeth_generic_devtype) + if (cgdev->dev.type != &qeth_l3_devtype) device_remove_groups(&cgdev->dev, qeth_l3_attr_groups); qeth_set_allowed_threads(card, 0, 1); @@ -2065,7 +2069,6 @@ static int qeth_l3_control_event(struct qeth_card *card, } const struct qeth_discipline qeth_l3_discipline = { - .devtype = &qeth_l3_devtype, .setup = qeth_l3_probe_device, .remove = qeth_l3_remove_device, .set_online = qeth_l3_set_online, diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index dcbba9621b21..5d24c1b6663b 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -524,7 +524,7 @@ static const struct net_device_ops cvm_oct_npi_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -540,7 +540,7 @@ static const struct net_device_ops cvm_oct_xaui_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -556,7 +556,7 @@ static const struct net_device_ops cvm_oct_sgmii_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -570,7 +570,7 @@ static const struct net_device_ops cvm_oct_spi_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -586,7 +586,7 @@ static const struct net_device_ops cvm_oct_rgmii_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -599,7 +599,7 @@ static const struct net_device_ops cvm_oct_pow_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit_pow, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/staging/rtl8188eu/include/osdep_intf.h b/drivers/staging/rtl8188eu/include/osdep_intf.h index 5012b9176526..34decb03e92f 100644 --- a/drivers/staging/rtl8188eu/include/osdep_intf.h +++ b/drivers/staging/rtl8188eu/include/osdep_intf.h @@ -22,6 +22,8 @@ void rtw_stop_drv_threads(struct adapter *padapter); void rtw_cancel_all_timer(struct adapter *padapter); int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +int rtw_android_priv_cmd(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); struct net_device *rtw_init_netdev(void); u16 rtw_recv_select_queue(struct sk_buff *skb); diff --git a/drivers/staging/rtl8188eu/include/rtw_android.h b/drivers/staging/rtl8188eu/include/rtw_android.h index 2c26993b8205..3018fc1e8de8 100644 --- a/drivers/staging/rtl8188eu/include/rtw_android.h +++ b/drivers/staging/rtl8188eu/include/rtw_android.h @@ -45,6 +45,7 @@ enum ANDROID_WIFI_CMD { ANDROID_WIFI_CMD_MAX }; -int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd); +int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, + void __user *data, int cmd); #endif /* __RTW_ANDROID_H__ */ diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index b958a8d882b0..193a3dde462c 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2769,9 +2769,6 @@ int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) ret = rtw_hostapd_ioctl(dev, &wrq->u.data); break; #endif /* CONFIG_88EU_AP_MODE */ - case (SIOCDEVPRIVATE + 1): - ret = rtw_android_priv_cmd(dev, rq, cmd); - break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c index 423c382e3d20..596e03e7b286 100644 --- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c +++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c @@ -288,6 +288,7 @@ static const struct net_device_ops rtw_netdev_ops = { .ndo_set_mac_address = rtw_net_set_mac_address, .ndo_get_stats = rtw_net_get_stats, .ndo_do_ioctl = rtw_ioctl, + .ndo_siocdevprivate = rtw_android_priv_cmd, }; static const struct device_type wlan_type = { diff --git a/drivers/staging/rtl8188eu/os_dep/rtw_android.c b/drivers/staging/rtl8188eu/os_dep/rtw_android.c index 3c5446999686..a13df3880378 100644 --- a/drivers/staging/rtl8188eu/os_dep/rtw_android.c +++ b/drivers/staging/rtl8188eu/os_dep/rtw_android.c @@ -5,6 +5,7 @@ * ******************************************************************************/ +#include <linux/compat.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -116,7 +117,8 @@ static int android_get_p2p_addr(struct net_device *net, char *command, return ETH_ALEN; } -int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd) +int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, + void __user *data, int cmd) { int ret = 0; char *command; @@ -124,9 +126,15 @@ int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd) int bytes_written = 0; struct android_wifi_priv_cmd priv_cmd; - if (!ifr->ifr_data) + if (cmd != SIOCDEVPRIVATE) + return -EOPNOTSUPP; + + if (in_compat_syscall()) /* to be implemented */ + return -EOPNOTSUPP; + + if (!data) return -EINVAL; - if (copy_from_user(&priv_cmd, ifr->ifr_data, sizeof(priv_cmd))) + if (copy_from_user(&priv_cmd, data, sizeof(priv_cmd))) return -EFAULT; if (priv_cmd.total_len < 1) return -EINVAL; diff --git a/drivers/staging/rtl8723bs/include/osdep_intf.h b/drivers/staging/rtl8723bs/include/osdep_intf.h index 111e0179712a..5badd441c14b 100644 --- a/drivers/staging/rtl8723bs/include/osdep_intf.h +++ b/drivers/staging/rtl8723bs/include/osdep_intf.h @@ -48,6 +48,8 @@ void rtw_stop_drv_threads(struct adapter *padapter); void rtw_cancel_all_timer(struct adapter *padapter); int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +int rtw_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); int rtw_init_netdev_name(struct net_device *pnetdev, const char *ifname); struct net_device *rtw_init_netdev(struct adapter *padapter); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index f95000df8942..aa7bd76bb5f1 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -4485,6 +4485,21 @@ exit: return err; } +int rtw_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd) +{ + struct iwreq *wrq = (struct iwreq *)rq; + + /* little hope of fixing this, better remove the whole function */ + if (in_compat_syscall()) + return -EOPNOTSUPP; + + if (cmd != SIOCDEVPRIVATE) + return -EOPNOTSUPP; + + return rtw_ioctl_wext_private(dev, &wrq->u); +} + int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct iwreq *wrq = (struct iwreq *)rq; @@ -4497,9 +4512,6 @@ int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case RTL_IOCTL_HOSTAPD: ret = rtw_hostapd_ioctl(dev, &wrq->u.data); break; - case SIOCDEVPRIVATE: - ret = rtw_ioctl_wext_private(dev, &wrq->u); - break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c index 648456b992bb..9e38b53d3b4a 100644 --- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c +++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c @@ -459,6 +459,7 @@ static const struct net_device_ops rtw_netdev_ops = { .ndo_set_mac_address = rtw_net_set_mac_address, .ndo_get_stats = rtw_net_get_stats, .ndo_do_ioctl = rtw_ioctl, + .ndo_siocdevprivate = rtw_siocdevprivate, }; int rtw_init_netdev_name(struct net_device *pnetdev, const char *ifname) diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c index 6f470e7ba647..1c62130a5eee 100644 --- a/drivers/staging/wlan-ng/p80211netdev.c +++ b/drivers/staging/wlan-ng/p80211netdev.c @@ -98,8 +98,8 @@ static int p80211knetdev_stop(struct net_device *netdev); static netdev_tx_t p80211knetdev_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev); static void p80211knetdev_set_multicast_list(struct net_device *dev); -static int p80211knetdev_do_ioctl(struct net_device *dev, struct ifreq *ifr, - int cmd); +static int p80211knetdev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); static int p80211knetdev_set_mac_address(struct net_device *dev, void *addr); static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue); static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc); @@ -461,56 +461,8 @@ static void p80211knetdev_set_multicast_list(struct net_device *dev) wlandev->set_multicast_list(wlandev, dev); } -#ifdef SIOCETHTOOL - -static int p80211netdev_ethtool(struct wlandevice *wlandev, - void __user *useraddr) -{ - u32 ethcmd; - struct ethtool_drvinfo info; - struct ethtool_value edata; - - memset(&info, 0, sizeof(info)); - memset(&edata, 0, sizeof(edata)); - - if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) - return -EFAULT; - - switch (ethcmd) { - case ETHTOOL_GDRVINFO: - info.cmd = ethcmd; - snprintf(info.driver, sizeof(info.driver), "p80211_%s", - wlandev->nsdname); - snprintf(info.version, sizeof(info.version), "%s", - WLAN_RELEASE); - - if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; -#ifdef ETHTOOL_GLINK - case ETHTOOL_GLINK: - edata.cmd = ethcmd; - - if (wlandev->linkstatus && - (wlandev->macmode != WLAN_MACMODE_NONE)) { - edata.data = 1; - } else { - edata.data = 0; - } - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -#endif - } - - return -EOPNOTSUPP; -} - -#endif - /*---------------------------------------------------------------- - * p80211knetdev_do_ioctl + * p80211knetdev_siocdevprivate * * Handle an ioctl call on one of our devices. Everything Linux * ioctl specific is done here. Then we pass the contents of the @@ -537,8 +489,9 @@ static int p80211netdev_ethtool(struct wlandevice *wlandev, * locks. *---------------------------------------------------------------- */ -static int p80211knetdev_do_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int p80211knetdev_siocdevprivate(struct net_device *dev, + struct ifreq *ifr, + void __user *data, int cmd) { int result = 0; struct p80211ioctl_req *req = (struct p80211ioctl_req *)ifr; @@ -547,13 +500,8 @@ static int p80211knetdev_do_ioctl(struct net_device *dev, netdev_dbg(dev, "rx'd ioctl, cmd=%d, len=%d\n", cmd, req->len); -#ifdef SIOCETHTOOL - if (cmd == SIOCETHTOOL) { - result = - p80211netdev_ethtool(wlandev, (void __user *)ifr->ifr_data); - goto bail; - } -#endif + if (in_compat_syscall()) + return -EOPNOTSUPP; /* Test the magic, assume ifr is good if it's there */ if (req->magic != P80211_IOCTL_MAGIC) { @@ -569,7 +517,7 @@ static int p80211knetdev_do_ioctl(struct net_device *dev, goto bail; } - msgbuf = memdup_user(req->data, req->len); + msgbuf = memdup_user(data, req->len); if (IS_ERR(msgbuf)) { result = PTR_ERR(msgbuf); goto bail; @@ -578,10 +526,8 @@ static int p80211knetdev_do_ioctl(struct net_device *dev, result = p80211req_dorequest(wlandev, msgbuf); if (result == 0) { - if (copy_to_user - (req->data, msgbuf, req->len)) { + if (copy_to_user(data, msgbuf, req->len)) result = -EFAULT; - } } kfree(msgbuf); @@ -682,7 +628,7 @@ static const struct net_device_ops p80211_netdev_ops = { .ndo_stop = p80211knetdev_stop, .ndo_start_xmit = p80211knetdev_hard_start_xmit, .ndo_set_rx_mode = p80211knetdev_set_multicast_list, - .ndo_do_ioctl = p80211knetdev_do_ioctl, + .ndo_siocdevprivate = p80211knetdev_siocdevprivate, .ndo_set_mac_address = p80211knetdev_set_mac_address, .ndo_tx_timeout = p80211knetdev_tx_timeout, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 5bb928b7873e..3e3b8873fa29 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1524,11 +1524,11 @@ static int hdlcdev_close(struct net_device *dev) * * Return: 0 if success, otherwise error code */ -static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int hdlcdev_ioctl(struct net_device *dev, struct if_settings *ifs) { const size_t size = sizeof(sync_serial_settings); sync_serial_settings new_line; - sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; + sync_serial_settings __user *line = ifs->ifs_ifsu.sync; struct slgt_info *info = dev_to_port(dev); unsigned int flags; @@ -1538,17 +1538,14 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (info->port.count) return -EBUSY; - if (cmd != SIOCWANDEV) - return hdlc_ioctl(dev, ifr, cmd); - memset(&new_line, 0, sizeof(new_line)); - switch(ifr->ifr_settings.type) { + switch (ifs->type) { case IF_GET_IFACE: /* return current sync_serial_settings */ - ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; - if (ifr->ifr_settings.size < size) { - ifr->ifr_settings.size = size; /* data size wanted */ + ifs->type = IF_IFACE_SYNC_SERIAL; + if (ifs->size < size) { + ifs->size = size; /* data size wanted */ return -ENOBUFS; } @@ -1615,7 +1612,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return 0; default: - return hdlc_ioctl(dev, ifr, cmd); + return hdlc_ioctl(dev, ifs); } } @@ -1688,7 +1685,7 @@ static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, .ndo_start_xmit = hdlc_start_xmit, - .ndo_do_ioctl = hdlcdev_ioctl, + .ndo_siocwandev = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, }; diff --git a/include/asm-generic/compat.h b/include/asm-generic/compat.h index 30f7b18a36f9..d46c0201cc34 100644 --- a/include/asm-generic/compat.h +++ b/include/asm-generic/compat.h @@ -20,7 +20,18 @@ typedef u16 compat_ushort_t; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; typedef u32 compat_uptr_t; +typedef u32 compat_caddr_t; typedef u32 compat_aio_context_t; +typedef u32 compat_old_sigset_t; + +#ifndef __compat_uid32_t +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +#endif + +#ifndef compat_mode_t +typedef u32 compat_mode_t; +#endif #ifdef CONFIG_COMPAT_FOR_U64_ALIGNMENT typedef s64 __attribute__((aligned(4))) compat_s64; @@ -30,4 +41,10 @@ typedef s64 compat_s64; typedef u64 compat_u64; #endif +#ifndef _COMPAT_NSIG +typedef u32 compat_sigset_word; +#define _COMPAT_NSIG _NSIG +#define _COMPAT_NSIG_BPW 32 +#endif + #endif diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 26bf15e6cd35..5e62e2383b7f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,6 +4,7 @@ #include <asm/types.h> #include <linux/bits.h> +#include <linux/typecheck.h> #include <uapi/linux/kernel.h> @@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, __clear_bit(nr, addr); } +/** + * __ptr_set_bit - Set bit in a pointer's value + * @nr: the bit to set + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * __ptr_set_bit(bit, &p); + */ +#define __ptr_set_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + __set_bit(nr, (unsigned long *)(addr)); \ + }) + +/** + * __ptr_clear_bit - Clear bit in a pointer's value + * @nr: the bit to clear + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * __ptr_clear_bit(bit, &p); + */ +#define __ptr_clear_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + __clear_bit(nr, (unsigned long *)(addr)); \ + }) + +/** + * __ptr_test_bit - Test bit in a pointer's value + * @nr: the bit to test + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * if (__ptr_test_bit(bit, &p)) { + * ... + * } else { + * ... + * } + */ +#define __ptr_test_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + test_bit(nr, (unsigned long *)(addr)); \ + }) + #ifdef __KERNEL__ #ifndef set_mask_bits diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8b77d08d4b47..a74cd1c3bd87 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -27,19 +27,6 @@ struct task_struct; extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) -#define BPF_CGROUP_STORAGE_NEST_MAX 8 - -struct bpf_cgroup_storage_info { - struct task_struct *task; - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; -}; - -/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks - * to use bpf cgroup storage simultaneously. - */ -DECLARE_PER_CPU(struct bpf_cgroup_storage_info, - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); - #define for_each_cgroup_storage_type(stype) \ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) @@ -172,44 +159,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type( return BPF_CGROUP_STORAGE_SHARED; } -static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage - *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) -{ - enum bpf_cgroup_storage_type stype; - int i, err = 0; - - preempt_disable(); - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL)) - continue; - - this_cpu_write(bpf_cgroup_storage_info[i].task, current); - for_each_cgroup_storage_type(stype) - this_cpu_write(bpf_cgroup_storage_info[i].storage[stype], - storage[stype]); - goto out; - } - err = -EBUSY; - WARN_ON_ONCE(1); - -out: - preempt_enable(); - return err; -} - -static inline void bpf_cgroup_storage_unset(void) -{ - int i; - - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) - continue; - - this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); - return; - } -} - struct bpf_cgroup_storage * cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, void *key, bool locked); @@ -487,9 +436,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } -static inline int bpf_cgroup_storage_set( - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; } -static inline void bpf_cgroup_storage_unset(void) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e8e2b0393ca9..c8cc09013210 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -168,6 +168,7 @@ struct bpf_map { u32 max_entries; u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ + int timer_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; u32 btf_key_type_id; @@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map) return map->spin_lock_off >= 0; } -static inline void check_and_init_map_lock(struct bpf_map *map, void *dst) +static inline bool map_value_has_timer(const struct bpf_map *map) { - if (likely(!map_value_has_spin_lock(map))) - return; - *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = - (struct bpf_spin_lock){}; + return map->timer_off >= 0; } -/* copy everything but bpf_spin_lock */ +static inline void check_and_init_map_value(struct bpf_map *map, void *dst) +{ + if (unlikely(map_value_has_spin_lock(map))) + *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = + (struct bpf_spin_lock){}; + if (unlikely(map_value_has_timer(map))) + *(struct bpf_timer *)(dst + map->timer_off) = + (struct bpf_timer){}; +} + +/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { + u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0; + if (unlikely(map_value_has_spin_lock(map))) { - u32 off = map->spin_lock_off; + s_off = map->spin_lock_off; + s_sz = sizeof(struct bpf_spin_lock); + } else if (unlikely(map_value_has_timer(map))) { + t_off = map->timer_off; + t_sz = sizeof(struct bpf_timer); + } - memcpy(dst, src, off); - memcpy(dst + off + sizeof(struct bpf_spin_lock), - src + off + sizeof(struct bpf_spin_lock), - map->value_size - off - sizeof(struct bpf_spin_lock)); + if (unlikely(s_sz || t_sz)) { + if (s_off < t_off || !s_sz) { + swap(s_off, t_off); + swap(s_sz, t_sz); + } + memcpy(dst, src, t_off); + memcpy(dst + t_off + t_sz, + src + t_off + t_sz, + s_off - t_off - t_sz); + memcpy(dst + s_off + s_sz, + src + s_off + s_sz, + map->value_size - s_off - s_sz); } else { memcpy(dst, src, map->value_size); } } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); +void bpf_timer_cancel_and_free(void *timer); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -314,6 +338,7 @@ enum bpf_arg_type { ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ + ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ __BPF_ARG_TYPE_MAX, }; @@ -554,6 +579,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Store IP address of the caller on the trampoline stack, + * so it's available for trampoline's programs. + */ +#define BPF_TRAMP_F_IP_ARG BIT(3) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ @@ -1112,38 +1142,40 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *include_prog, struct bpf_prog_array **new_array); +struct bpf_run_ctx {}; + +struct bpf_cg_run_ctx { + struct bpf_run_ctx run_ctx; + struct bpf_prog_array_item *prog_item; +}; + /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) /* BPF program asks to set CN on the packet. */ #define BPF_RET_SET_CN (1 << 0) -/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, - * if bpf_cgroup_storage_set() failed, the rest of programs - * will not execute. This should be a really rare scenario - * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of - * preemptions all between bpf_cgroup_storage_set() and - * bpf_cgroup_storage_unset() on the same cpu. - */ #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ ({ \ struct bpf_prog_array_item *_item; \ struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ + struct bpf_run_ctx *old_run_ctx; \ + struct bpf_cg_run_ctx run_ctx; \ u32 _ret = 1; \ u32 func_ret; \ migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \ while ((_prog = READ_ONCE(_item->prog))) { \ - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ - break; \ + run_ctx.prog_item = _item; \ func_ret = func(_prog, ctx); \ _ret &= (func_ret & 1); \ - *(ret_flags) |= (func_ret >> 1); \ - bpf_cgroup_storage_unset(); \ + *(ret_flags) |= (func_ret >> 1); \ _item++; \ } \ + bpf_reset_run_ctx(old_run_ctx); \ rcu_read_unlock(); \ migrate_enable(); \ _ret; \ @@ -1154,6 +1186,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog_array_item *_item; \ struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ + struct bpf_run_ctx *old_run_ctx; \ + struct bpf_cg_run_ctx run_ctx; \ u32 _ret = 1; \ migrate_disable(); \ rcu_read_lock(); \ @@ -1161,17 +1195,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, if (unlikely(check_non_null && !_array))\ goto _out; \ _item = &_array->items[0]; \ - while ((_prog = READ_ONCE(_item->prog))) { \ - if (!set_cg_storage) { \ - _ret &= func(_prog, ctx); \ - } else { \ - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ - break; \ - _ret &= func(_prog, ctx); \ - bpf_cgroup_storage_unset(); \ - } \ + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\ + while ((_prog = READ_ONCE(_item->prog))) { \ + run_ctx.prog_item = _item; \ + _ret &= func(_prog, ctx); \ _item++; \ } \ + bpf_reset_run_ctx(old_run_ctx); \ _out: \ rcu_read_unlock(); \ migrate_enable(); \ @@ -1254,6 +1284,20 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) +{ + struct bpf_run_ctx *old_ctx; + + old_ctx = current->bpf_ctx; + current->bpf_ctx = new_ctx; + return old_ctx; +} + +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) +{ + current->bpf_ctx = old_ctx; +} + extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -1398,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, struct seq_file *seq); typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +typedef const struct bpf_func_proto * +(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id, + const struct bpf_prog *prog); enum bpf_iter_feature { BPF_ITER_RESCHED = BIT(0), @@ -1410,6 +1457,7 @@ struct bpf_iter_reg { bpf_iter_detach_target_t detach_target; bpf_iter_show_fdinfo_t show_fdinfo; bpf_iter_fill_link_info_t fill_link_info; + bpf_iter_get_func_proto_t get_func_proto; u32 ctx_arg_info_size; u32 feature; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; @@ -1432,6 +1480,8 @@ struct bpf_iter__bpf_map_elem { int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); +const struct bpf_func_proto * +bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); int bpf_iter_new_fd(struct bpf_link *link); bool bpf_link_is_iter(struct bpf_link *link); @@ -1509,12 +1559,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); -bool dev_map_can_have_prog(struct bpf_map *map); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); -bool cpu_map_prog_allowed(struct bpf_map *map); +int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb); /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) @@ -1711,6 +1761,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, return 0; } +static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + static inline bool cpu_map_prog_allowed(struct bpf_map *map) { return false; @@ -1852,6 +1908,12 @@ void bpf_map_offload_map_free(struct bpf_map *map); int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); + +int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); +int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) @@ -1884,24 +1946,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog, { return -ENOTSUPP; } -#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ - -#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); -int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); -int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); -void sock_map_unhash(struct sock *sk); -void sock_map_close(struct sock *sk, long timeout); - -void bpf_sk_reuseport_detach(struct sock *sk); -int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); -int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); -#else -static inline void bpf_sk_reuseport_detach(struct sock *sk) -{ -} #ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, @@ -1921,7 +1965,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } +#endif /* CONFIG_BPF_SYSCALL */ +#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); +#else +static inline void bpf_sk_reuseport_detach(struct sock *sk) +{ +} + +#ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { @@ -1998,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; +extern const struct bpf_func_proto bpf_sk_setsockopt_proto; +extern const struct bpf_func_proto bpf_sk_getsockopt_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 828d08afeee0..5424124dbe36 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -53,7 +53,14 @@ struct bpf_reg_state { /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL */ - struct bpf_map *map_ptr; + struct { + struct bpf_map *map_ptr; + /* To distinguish map lookups from outer map + * the map_uid is non-zero for registers + * pointing to inner maps. + */ + u32 map_uid; + }; /* for PTR_TO_BTF_ID */ struct { @@ -201,12 +208,19 @@ struct bpf_func_state { * zero == main subprog */ u32 subprogno; + /* Every bpf_timer_start will increment async_entry_cnt. + * It's used to distinguish: + * void foo(void) { for(;;); } + * void foo(void) { bpf_timer_set_callback(,foo); } + */ + u32 async_entry_cnt; + bool in_callback_fn; + bool in_async_callback_fn; /* The following fields should be last. See copy_func_state() */ int acquired_refs; struct bpf_reference_state *refs; int allocated_stack; - bool in_callback_fn; struct bpf_stack_state *stack; }; @@ -392,6 +406,7 @@ struct bpf_subprog_info { bool has_tail_call; bool tail_call_reachable; bool has_ld_abs; + bool is_async_cb; }; /* single container for all structs diff --git a/include/linux/btf.h b/include/linux/btf.h index 94a0c976c90f..214fde93214b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); +int btf_find_timer(const struct btf *btf, const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index ae7a3411167c..9de6e9053e34 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -37,7 +37,7 @@ * quanta, from when the bit is sent on the TX pin to when it is * received on the RX pin of the transmitter. Possible options: * - * O: automatic mode. The controller dynamically measure @tdcv + * 0: automatic mode. The controller dynamically measures @tdcv * for each transmitted CAN FD frame. * * Other values: manual mode. Use the fixed provided value. @@ -45,7 +45,7 @@ * @tdco: Transmitter Delay Compensation Offset. Offset value, in time * quanta, defining the distance between the start of the bit * reception on the RX pin of the transceiver and the SSP - * position such as SSP = @tdcv + @tdco. + * position such that SSP = @tdcv + @tdco. * * If @tdco is zero, then TDC is disabled and both @tdcv and * @tdcf should be ignored. diff --git a/include/linux/can/platform/flexcan.h b/include/linux/can/platform/flexcan.h new file mode 100644 index 000000000000..1b536fb999de --- /dev/null +++ b/include/linux/can/platform/flexcan.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Angelo Dureghello <angelo@kernel-space.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _CAN_PLATFORM_FLEXCAN_H +#define _CAN_PLATFORM_FLEXCAN_H + +struct flexcan_platform_data { + u32 clock_frequency; + u8 clk_src; +}; + +#endif /* _CAN_PLATFORM_FLEXCAN_H */ diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 40882df7105e..c11477620403 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -20,6 +20,7 @@ struct can_rx_offload { bool drop); struct sk_buff_head skb_queue; + struct sk_buff_head skb_irq_queue; u32 skb_queue_len_max; unsigned int mb_first; @@ -48,14 +49,11 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int *frame_len_ptr); int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb); +void can_rx_offload_irq_finish(struct can_rx_offload *offload); +void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); void can_rx_offload_enable(struct can_rx_offload *offload); -static inline void can_rx_offload_schedule(struct can_rx_offload *offload) -{ - napi_schedule(&offload->napi); -} - static inline void can_rx_offload_disable(struct can_rx_offload *offload) { napi_disable(&offload->napi); diff --git a/include/linux/compat.h b/include/linux/compat.h index c270124e4402..8e0598c7d1d1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -20,11 +20,8 @@ #include <linux/unistd.h> #include <asm/compat.h> - -#ifdef CONFIG_COMPAT #include <asm/siginfo.h> #include <asm/signal.h> -#endif #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER /* @@ -95,8 +92,6 @@ struct compat_iovec { compat_size_t iov_len; }; -#ifdef CONFIG_COMPAT - #ifndef compat_user_stack_pointer #define compat_user_stack_pointer() current_user_stack_pointer() #endif @@ -131,9 +126,11 @@ struct compat_tms { #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) +#ifndef compat_sigset_t typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; +#endif int set_compat_user_sigmask(const compat_sigset_t __user *umask, size_t sigsetsize); @@ -384,6 +381,7 @@ struct compat_keyctl_kdf_params { __u32 __spare[8]; }; +struct compat_stat; struct compat_statfs; struct compat_statfs64; struct compat_old_linux_dirent; @@ -428,7 +426,7 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsigned int size) { /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */ -#ifdef __BIG_ENDIAN +#if defined(__BIG_ENDIAN) && defined(CONFIG_64BIT) compat_sigset_t v; switch (_NSIG_WORDS) { case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3]; @@ -929,17 +927,6 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args); #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ - -/* - * For most but not all architectures, "am I in a compat syscall?" and - * "am I a compat task?" are the same question. For architectures on which - * they aren't the same question, arch code can override in_compat_syscall. - */ - -#ifndef in_compat_syscall -static inline bool in_compat_syscall(void) { return is_compat_task(); } -#endif - /** * ns_to_old_timeval32 - Compat version of ns_to_timeval * @nsec: the nanoseconds value to be converted @@ -969,6 +956,17 @@ int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf); +#ifdef CONFIG_COMPAT + +/* + * For most but not all architectures, "am I in a compat syscall?" and + * "am I a compat task?" are the same question. For architectures on which + * they aren't the same question, arch code can override in_compat_syscall. + */ +#ifndef in_compat_syscall +static inline bool in_compat_syscall(void) { return is_compat_task(); } +#endif + #else /* !CONFIG_COMPAT */ #define is_compat_task() (0) diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 1587961f1a7b..c7fa4a3498fe 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -11,60 +11,48 @@ struct dsa_switch; struct sk_buff; struct net_device; -struct packet_type; -struct dsa_8021q_context; -struct dsa_8021q_crosschip_link { +struct dsa_tag_8021q_vlan { struct list_head list; int port; - struct dsa_8021q_context *other_ctx; - int other_port; + u16 vid; refcount_t refcount; }; -struct dsa_8021q_ops { - int (*vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); - int (*vlan_del)(struct dsa_switch *ds, int port, u16 vid); -}; - struct dsa_8021q_context { - const struct dsa_8021q_ops *ops; struct dsa_switch *ds; - struct list_head crosschip_links; + struct list_head vlans; /* EtherType of RX VID, used for filtering on master interface */ __be16 proto; }; -#define DSA_8021Q_N_SUBVLAN 8 - -int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled); +int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); -int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port); - -int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port); +void dsa_tag_8021q_unregister(struct dsa_switch *ds); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *subvlan); +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); + +int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num); + +void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num); + +u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num); u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port); u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port); -u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan); - int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); -u16 dsa_8021q_rx_subvlan(u16 vid); - bool vid_is_dsa_8021q_rxvlan(u16 vid); bool vid_is_dsa_8021q_txvlan(u16 vid); diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index b6089b88314c..0eadc7ac44ec 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -59,7 +59,6 @@ struct sja1105_skb_cb { ((struct sja1105_skb_cb *)((skb)->cb)) struct sja1105_port { - u16 subvlan_map[DSA_8021Q_N_SUBVLAN]; struct kthread_worker *xmit_worker; struct kthread_work xmit_work; struct sk_buff_head xmit_queue; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 232daaec56e4..4711b96dae0c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -17,8 +17,6 @@ #include <linux/compat.h> #include <uapi/linux/ethtool.h> -#ifdef CONFIG_COMPAT - struct compat_ethtool_rx_flow_spec { u32 flow_type; union ethtool_flow_union h_u; @@ -38,8 +36,6 @@ struct compat_ethtool_rxnfc { u32 rule_locs[]; }; -#endif /* CONFIG_COMPAT */ - #include <linux/rculist.h> /** diff --git a/include/linux/filter.h b/include/linux/filter.h index 83b896044e79..ff698c9d1c94 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -574,7 +574,8 @@ struct bpf_prog { kprobe_override:1, /* Do we override a kprobe? */ has_callchain_buf:1, /* callchain buffer allocated? */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ - call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */ + call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ + call_get_func_ip:1; /* Do we call get_func_ip() */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 63b56aba925a..30ece3ae6df7 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -423,7 +423,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev); void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); -struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev); +struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, + u16 if_id); extern struct bus_type fsl_mc_bus_type; diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index bc738504ab4a..c285968e437a 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -8,34 +8,11 @@ /* All generic netlink requests are serialized by a global lock. */ extern void genl_lock(void); extern void genl_unlock(void); -#ifdef CONFIG_LOCKDEP -extern bool lockdep_genl_is_held(void); -#endif /* for synchronisation between af_netlink and genetlink */ extern atomic_t genl_sk_destructing_cnt; extern wait_queue_head_t genl_sk_destructing_waitq; -/** - * rcu_dereference_genl - rcu_dereference with debug checking - * @p: The pointer to read, prior to dereferencing - * - * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() - * or genl mutex. Note : Please prefer genl_dereference() or rcu_dereference() - */ -#define rcu_dereference_genl(p) \ - rcu_dereference_check(p, lockdep_genl_is_held()) - -/** - * genl_dereference - fetch RCU pointer when updates are prevented by genl mutex - * @p: The pointer to read, prior to dereferencing - * - * Return the value of the specified RCU-protected pointer, but omit - * the READ_ONCE(), because caller holds genl mutex. - */ -#define genl_dereference(p) \ - rcu_dereference_protected(p, lockdep_genl_is_held()) - #define MODULE_ALIAS_GENL_FAMILY(family)\ MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family) diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index cacc4dd27794..630a388035f1 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -22,7 +22,7 @@ struct hdlc_proto { void (*start)(struct net_device *dev); /* if open & DCD */ void (*stop)(struct net_device *dev); /* if open & !DCD */ void (*detach)(struct net_device *dev); - int (*ioctl)(struct net_device *dev, struct ifreq *ifr); + int (*ioctl)(struct net_device *dev, struct if_settings *ifs); __be16 (*type_trans)(struct sk_buff *skb, struct net_device *dev); int (*netif_rx)(struct sk_buff *skb); netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -54,7 +54,7 @@ typedef struct hdlc_device { /* Exported from hdlc module */ /* Called by hardware driver when a user requests HDLC service */ -int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +int hdlc_ioctl(struct net_device *dev, struct if_settings *ifs); /* Must be used by hardware driver on module startup/exit */ #define register_hdlc_device(dev) register_netdev(dev) diff --git a/include/linux/hdlcdrv.h b/include/linux/hdlcdrv.h index d4d633a49d36..5d70c3f98f5b 100644 --- a/include/linux/hdlcdrv.h +++ b/include/linux/hdlcdrv.h @@ -79,7 +79,7 @@ struct hdlcdrv_ops { */ int (*open)(struct net_device *); int (*close)(struct net_device *); - int (*ioctl)(struct net_device *, struct ifreq *, + int (*ioctl)(struct net_device *, void __user *, struct hdlcdrv_ioctl *, int); }; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index b651c5e32a28..509e18c7e740 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -57,10 +57,16 @@ struct br_ip_list { #define BR_MRP_AWARE BIT(17) #define BR_MRP_LOST_CONT BIT(18) #define BR_MRP_LOST_IN_CONT BIT(19) +#define BR_TX_FWD_OFFLOAD BIT(20) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); +struct net_bridge; +void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, + unsigned int cmd, struct ifreq *ifr, + void __user *uarg)); +int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, + struct ifreq *ifr, void __user *uarg); #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, @@ -70,9 +76,6 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); bool br_multicast_has_router_adjacent(struct net_device *dev, int proto); bool br_multicast_enabled(const struct net_device *dev); bool br_multicast_router(const struct net_device *dev); -int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack); #else static inline int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) @@ -104,13 +107,6 @@ static inline bool br_multicast_router(const struct net_device *dev) { return false; } -static inline int br_mdb_replay(const struct net_device *br_dev, - const struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) -{ - return -EOPNOTSUPP; -} #endif #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) @@ -120,9 +116,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid); int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto); int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); -int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack); +int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -150,12 +145,10 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, return -EINVAL; } -static inline int br_vlan_replay(struct net_device *br_dev, - struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) +static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo) { - return -EOPNOTSUPP; + return -EINVAL; } #endif @@ -167,8 +160,6 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid); bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); u8 br_port_get_stp_state(const struct net_device *dev); clock_t br_get_ageing_time(const struct net_device *br_dev); -int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb); #else static inline struct net_device * br_fdb_find_port(const struct net_device *br_dev, @@ -197,13 +188,6 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev) { return 0; } - -static inline int br_fdb_replay(const struct net_device *br_dev, - const struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb) -{ - return -EOPNOTSUPP; -} #endif #endif diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 64ce8cd1cfaf..93c262ecbdc9 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -41,9 +41,6 @@ struct ip_sf_socklist { __be32 sl_addr[]; }; -#define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ - (count) * sizeof(__be32)) - #define IP_SFBLOCK 10 /* allocate this many at once */ /* ip_mc_socklist is real list now. Speed is not argument; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 53aa0343bf69..67e042932681 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -178,6 +178,15 @@ static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *); +#ifdef CONFIG_INET +int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size); +#else +static inline int inet_gifconf(struct net_device *dev, char __user *buf, + int len, int size) +{ + return 0; +} +#endif void devinet_init(void); struct in_device *inetdev_by_index(struct net *, int); __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); diff --git a/include/linux/ioam6.h b/include/linux/ioam6.h new file mode 100644 index 000000000000..94a24b36998f --- /dev/null +++ b/include/linux/ioam6.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ +#ifndef _LINUX_IOAM6_H +#define _LINUX_IOAM6_H + +#include <uapi/linux/ioam6.h> + +#endif /* _LINUX_IOAM6_H */ diff --git a/include/linux/ioam6_genl.h b/include/linux/ioam6_genl.h new file mode 100644 index 000000000000..176e67919de3 --- /dev/null +++ b/include/linux/ioam6_genl.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM Generic Netlink API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ +#ifndef _LINUX_IOAM6_GENL_H +#define _LINUX_IOAM6_GENL_H + +#include <uapi/linux/ioam6_genl.h> + +#endif /* _LINUX_IOAM6_GENL_H */ diff --git a/include/linux/ioam6_iptunnel.h b/include/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..07d9dfedd29d --- /dev/null +++ b/include/linux/ioam6_iptunnel.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ +#ifndef _LINUX_IOAM6_IPTUNNEL_H +#define _LINUX_IOAM6_IPTUNNEL_H + +#include <uapi/linux/ioam6_iptunnel.h> + +#endif /* _LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 70b2ad3b9884..ef4a69865737 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -76,6 +76,9 @@ struct ipv6_devconf { __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; + __u32 ioam6_id; + __u32 ioam6_id_wide; + __u8 ioam6_enabled; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5e08468854db..c493a80cb453 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -356,6 +356,7 @@ struct mhi_controller_config { * @fbc_download: MHI host needs to do complete image transfer (optional) * @wake_set: Device wakeup set flag * @irq_flags: irq flags passed to request_irq (optional) + * @mru: the default MRU for the MHI device * * Fields marked as (required) need to be populated by the controller driver * before calling mhi_register_controller(). For the fields marked as (optional) @@ -448,6 +449,7 @@ struct mhi_controller { bool fbc_download; bool wake_set; unsigned long irq_flags; + u32 mru; }; /** diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 77746f7e35b8..0106c67e8ccb 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -38,6 +38,8 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) + enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b0009aa3647f..6bbae0c3bc0b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -921,7 +921,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 scatter_fcs[0x1]; u8 enhanced_multi_pkt_send_wqe[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_at_1c[0x2]; + u8 tunnel_lro_gre[0x1]; + u8 tunnel_lro_vxlan[0x1]; u8 tunnel_stateless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eaf5bb008aa9..135c943699d0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -295,18 +295,6 @@ enum netdev_state_t { }; -/* - * This structure holds boot-time configured netdevice settings. They - * are then used in the device probing. - */ -struct netdev_boot_setup { - char name[IFNAMSIZ]; - struct ifmap map; -}; -#define NETDEV_BOOT_SETUP_MAX 8 - -int __init netdev_boot_setup(char *str); - struct gro_list { struct list_head list; int count; @@ -1086,9 +1074,18 @@ struct netdev_net_notifier { * Test if Media Access Control address is valid for the device. * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); - * Called when a user requests an ioctl which can't be handled by - * the generic interface code. If not defined ioctls return - * not supported error code. + * Old-style ioctl entry point. This is used internally by the + * appletalk and ieee802154 subsystems but is no longer called by + * the device ioctl handler. + * + * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Used by the bonding driver for its device specific ioctls: + * SIOCBONDENSLAVE, SIOCBONDRELEASE, SIOCBONDSETHWADDR, SIOCBONDCHANGEACTIVE, + * SIOCBONDSLAVEINFOQUERY, and SIOCBONDINFOQUERY + * + * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG, + * SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP. * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface @@ -1361,6 +1358,15 @@ struct net_device_ops { int (*ndo_validate_addr)(struct net_device *dev); int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + int (*ndo_eth_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*ndo_siocbond)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*ndo_siocwandev)(struct net_device *dev, + struct if_settings *ifs); + int (*ndo_siocdevprivate)(struct net_device *dev, + struct ifreq *ifr, + void __user *data, int cmd); int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); int (*ndo_change_mtu)(struct net_device *dev, @@ -1805,6 +1811,7 @@ enum netdev_ml_priv_type { * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network * device struct * @mpls_ptr: mpls_dev struct pointer + * @mctp_ptr: MCTP specific data * * @dev_addr: Hw address (before bcast, * because most packets are unicast) @@ -2092,6 +2099,9 @@ struct net_device { #if IS_ENABLED(CONFIG_MPLS_ROUTING) struct mpls_dev __rcu *mpls_ptr; #endif +#if IS_ENABLED(CONFIG_MCTP) + struct mctp_dev __rcu *mctp_ptr; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) @@ -2917,7 +2927,6 @@ static inline struct net_device *first_net_device_rcu(struct net *net) } int netdev_boot_setup_check(struct net_device *dev); -unsigned long netdev_boot_base(const char *prefix, int unit); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); @@ -3289,14 +3298,6 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } -typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, - int len, int size); -int register_gifconf(unsigned int family, gifconf_func_t *gifconf); -static inline int unregister_gifconf(unsigned int family) -{ - return register_gifconf(family, NULL); -} - #ifdef CONFIG_NET_FLOW_LIMIT #define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ struct sd_flow_limit { @@ -3915,6 +3916,8 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, return 0; } #endif +int netif_set_real_num_queues(struct net_device *dev, + unsigned int txq, unsigned int rxq); static inline struct netdev_rx_queue * __netif_get_rx_queue(struct net_device *dev, unsigned int rxq) @@ -3984,6 +3987,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); } +u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); @@ -4012,10 +4017,12 @@ int netdev_rx_handler_register(struct net_device *dev, void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); +int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg); +int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, - bool *need_copyout); -int dev_ifconf(struct net *net, struct ifconf *, int); -int dev_ethtool(struct net *net, struct ifreq *); + void __user *data, bool *need_copyout); +int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); @@ -4136,11 +4143,13 @@ void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_dec(*dev->pcpu_refcnt); + this_cpu_dec(*dev->pcpu_refcnt); #else - refcount_dec(&dev->dev_refcnt); + refcount_dec(&dev->dev_refcnt); #endif + } } /** @@ -4151,11 +4160,13 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_inc(*dev->pcpu_refcnt); + this_cpu_inc(*dev->pcpu_refcnt); #else - refcount_inc(&dev->dev_refcnt); + refcount_inc(&dev->dev_refcnt); #endif + } } /* Carrier loss detection, dial on demand. The functions netif_carrier_on diff --git a/include/linux/phy.h b/include/linux/phy.h index 3b80dc3ed68b..736e1d1a47c4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1431,6 +1431,7 @@ static inline int phy_device_register(struct phy_device *phy) static inline void phy_device_free(struct phy_device *phydev) { } #endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); +int phy_get_c45_ids(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); diff --git a/include/linux/sched.h b/include/linux/sched.h index ec8d07d88641..c64119aa2e60 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -42,6 +42,7 @@ struct backing_dev_info; struct bio_list; struct blk_plug; struct bpf_local_storage; +struct bpf_run_ctx; struct capture_control; struct cfs_rq; struct fs_struct; @@ -1379,6 +1380,8 @@ struct task_struct { #ifdef CONFIG_BPF_SYSCALL /* Used by BPF task local storage */ struct bpf_local_storage __rcu *bpf_storage; + /* Used for BPF run context */ + struct bpf_run_ctx *bpf_ctx; #endif #ifdef CONFIG_GCC_PLUGIN_STACKLEAK diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b2db9cd9a73f..783cc2368bb1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -689,6 +689,7 @@ typedef unsigned char *sk_buff_data_t; * CHECKSUM_UNNECESSARY (max 3) * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB + * @slow_gro: state present at GRO time, slower prepare step required * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking @@ -863,13 +864,14 @@ struct sk_buff { __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; #endif -#ifdef CONFIG_NET_REDIRECT __u8 redirected:1; +#ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; #endif #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif + __u8 slow_gro:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -990,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { + skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } @@ -1006,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } @@ -1179,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); +struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, @@ -4216,6 +4221,7 @@ static inline unsigned long skb_get_nfct(const struct sk_buff *skb) static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) + skb->slow_gro |= !!nfct; skb->_nfct = nfct; #endif } @@ -4375,6 +4381,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(dst)); #endif + dst->slow_gro = src->slow_gro; __nf_copy(dst, src, true); } @@ -4664,17 +4671,13 @@ static inline __wsum lco_csum(struct sk_buff *skb) static inline bool skb_is_redirected(const struct sk_buff *skb) { -#ifdef CONFIG_NET_REDIRECT return skb->redirected; -#else - return false; -#endif } static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) { -#ifdef CONFIG_NET_REDIRECT skb->redirected = 1; +#ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) skb->tstamp = 0; @@ -4683,9 +4686,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) static inline void skb_reset_redirect(struct sk_buff *skb) { -#ifdef CONFIG_NET_REDIRECT skb->redirected = 0; -#endif } static inline bool skb_csum_is_sctp(struct sk_buff *skb) diff --git a/include/linux/socket.h b/include/linux/socket.h index 0d8e3dcb7f88..fd9ce51582d8 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -223,8 +223,11 @@ struct ucred { * reuses AF_INET address family */ #define AF_XDP 44 /* XDP sockets */ +#define AF_MCTP 45 /* Management component + * transport protocol + */ -#define AF_MAX 45 /* For now.. */ +#define AF_MAX 46 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -274,6 +277,7 @@ struct ucred { #define PF_QIPCRTR AF_QIPCRTR #define PF_SMC AF_SMC #define PF_XDP AF_XDP +#define PF_MCTP AF_MCTP #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h index 20d310331eb5..46b15e2aaefb 100644 --- a/include/linux/typecheck.h +++ b/include/linux/typecheck.h @@ -22,4 +22,13 @@ (void)__tmp; \ }) +/* + * Check at compile time that something is a pointer type. + */ +#define typecheck_pointer(x) \ +({ typeof(x) __dummy; \ + (void)sizeof(*__dummy); \ + 1; \ +}) + #endif /* TYPECHECK_H_INCLUDED */ diff --git a/include/net/Space.h b/include/net/Space.h index 9cce0d80d37a..08ca9cef0213 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit); struct net_device *wd_probe(int unit); struct net_device *ne_probe(int unit); struct net_device *fmv18x_probe(int unit); -struct net_device *i82596_probe(int unit); struct net_device *ni65_probe(int unit); struct net_device *sonic_probe(int unit); struct net_device *smc_init(int unit); -struct net_device *atarilance_probe(int unit); -struct net_device *sun3lance_probe(int unit); -struct net_device *sun3_82586_probe(int unit); -struct net_device *apne_probe(int unit); struct net_device *cs89x0_probe(int unit); -struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); struct net_device *cops_probe(int unit); -struct net_device *ltpc_probe(void); /* Fibre Channel adapters */ int iph5526_probe(struct net_device *dev); - -/* SBNI adapters */ -int sbni_probe(int unit); diff --git a/include/net/act_api.h b/include/net/act_api.h index 086b291e9530..f19f7f4a463c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -58,6 +58,14 @@ struct tc_action { #define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \ TCA_ACT_HW_STATS_DELAYED) +/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */ +#define TCA_ACT_FLAGS_USER_BITS 16 +#define TCA_ACT_FLAGS_USER_MASK 0xffff +#define TCA_ACT_FLAGS_POLICE (1U << TCA_ACT_FLAGS_USER_BITS) +#define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1)) +#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) +#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) + /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -99,8 +107,8 @@ struct tc_action_ops { void (*cleanup)(struct tc_action *); int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **act, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **act, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, @@ -179,18 +187,16 @@ int tcf_action_destroy(struct tc_action *actions[], int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, int bind, + struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - bool rtnl_held, struct netlink_ext_ack *extack); -struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + u32 flags, struct netlink_ext_ack *extack); +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct tc_action_ops *a_o, int *init_res, - bool rtnl_held, - struct netlink_ext_ack *extack); + u32 flags, struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index f42fdddecd41..4757d7f53f13 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -70,6 +70,9 @@ struct unix_sock { struct socket_wq peer_wq; wait_queue_entry_t peer_wake; struct scm_stat scm_stat; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + struct sk_buff *oob_skb; +#endif }; static inline struct unix_sock *unix_sk(const struct sock *sk) @@ -82,6 +85,8 @@ static inline struct unix_sock *unix_sk(const struct sock *sk) long unix_inq_len(struct sock *sk); long unix_outq_len(struct sock *sk); +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, + int flags); #ifdef CONFIG_SYSCTL int unix_sysctl_register(struct net *net); void unix_sysctl_unregister(struct net *net); @@ -89,4 +94,14 @@ void unix_sysctl_unregister(struct net *net); static inline int unix_sysctl_register(struct net *net) { return 0; } static inline void unix_sysctl_unregister(struct net *net) {} #endif + +#ifdef CONFIG_BPF_SYSCALL +extern struct proto unix_proto; + +int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); +void __init unix_bpf_build_proto(void); +#else +static inline void __init unix_bpf_build_proto(void) +{} +#endif #endif diff --git a/include/net/ax88796.h b/include/net/ax88796.h index aa52b2e8ff7b..2ed23a368602 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -38,4 +38,7 @@ struct ax_plat_data { int (*check_irq)(struct platform_device *pdev); }; +/* exported from ax88796.c for xsurf100.c */ +extern void ax_NS8390_reinit(struct net_device *dev); + #endif /* __NET_AX88796_PLAT_H */ diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c8696a230b7d..38785d48baff 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -303,6 +303,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); +void bond_3ad_update_lacp_active(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 9d382f2f0bc5..e64833a674eb 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -64,6 +64,7 @@ enum { BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, + BOND_OPT_LACP_ACTIVE, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 625d9c72dee3..46df47004803 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -129,6 +129,7 @@ struct bond_params { int updelay; int downdelay; int peer_notif_delay; + int lacp_active; int lacp_fast; unsigned int min_links; int ad_select; diff --git a/include/net/compat.h b/include/net/compat.h index 84805bdc4435..595fee069b82 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -71,13 +71,26 @@ struct compat_group_source_req { } __packed; struct compat_group_filter { - __u32 gf_interface; - struct __kernel_sockaddr_storage gf_group - __aligned(4); - __u32 gf_fmode; - __u32 gf_numsrc; - struct __kernel_sockaddr_storage gf_slist[1] - __aligned(4); + union { + struct { + __u32 gf_interface_aux; + struct __kernel_sockaddr_storage gf_group_aux + __aligned(4); + __u32 gf_fmode_aux; + __u32 gf_numsrc_aux; + struct __kernel_sockaddr_storage gf_slist[1] + __aligned(4); + } __packed; + struct { + __u32 gf_interface; + struct __kernel_sockaddr_storage gf_group + __aligned(4); + __u32 gf_fmode; + __u32 gf_numsrc; + struct __kernel_sockaddr_storage gf_slist_flex[] + __aligned(4); + } __packed; + }; } __packed; #endif /* NET_COMPAT_H */ diff --git a/include/net/devlink.h b/include/net/devlink.h index 57b738b78073..08f4c6191e72 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -55,8 +55,7 @@ struct devlink { * port, sb, dpipe, resource, params, region, traps and more. */ u8 reload_failed:1, - reload_enabled:1, - registered:1; + reload_enabled:1; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -158,7 +157,6 @@ struct devlink_port { struct list_head region_list; struct devlink *devlink; unsigned int index; - bool registered; spinlock_t type_lock; /* Protects type and type_dev * pointer consistency. */ @@ -1542,8 +1540,18 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev) struct ib_device; struct net *devlink_net(const struct devlink *devlink); -void devlink_net_set(struct devlink *devlink, struct net *net); -struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); +/* This call is intended for software devices that can create + * devlink instances in other namespaces than init_net. + * + * Drivers that operate on real HW must use devlink_alloc() instead. + */ +struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + size_t priv_size, struct net *net); +static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, + size_t priv_size) +{ + return devlink_alloc_ns(ops, priv_size, &init_net); +} int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); void devlink_reload_enable(struct devlink *devlink); diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index ccc6e9df178b..ddd6565957b3 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -29,7 +29,7 @@ struct dn_fib_nh { struct dn_fib_info { struct dn_fib_info *fib_next; struct dn_fib_info *fib_prev; - int fib_treeref; + refcount_t fib_treeref; refcount_t fib_clntref; int fib_dead; unsigned int fib_flags; diff --git a/include/net/dsa.h b/include/net/dsa.h index 33f40c1ec379..7cc9507282d3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -79,20 +79,13 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, }; -struct packet_type; struct dsa_switch; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); - /* Used to determine which traffic should match the DSA filter in - * eth_type_trans, and which, if any, should bypass it and be processed - * as regular on the master net device. - */ - bool (*filter)(const struct sk_buff *skb, struct net_device *dev); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; @@ -111,8 +104,8 @@ struct dsa_device_ops { * function pointers. */ struct dsa_netdevice_ops { - int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, - int cmd); + int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, + int cmd); }; #define DSA_TAG_DRIVER_ALIAS "dsa_tag-" @@ -159,6 +152,12 @@ struct dsa_switch_tree { */ struct net_device **lags; unsigned int lags_len; + + /* Track the largest switch index within a tree */ + unsigned int last_switch; + + /* Track the bridges with forwarding offload enabled */ + unsigned long fwd_offloading_bridges; }; #define dsa_lags_foreach_id(_id, _dst) \ @@ -238,9 +237,7 @@ struct dsa_port { /* Copies for faster access in master receive hot path */ struct dsa_switch_tree *dst; - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); - bool (*filter)(const struct sk_buff *skb, struct net_device *dev); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); enum { DSA_PORT_TYPE_UNUSED = 0, @@ -259,6 +256,7 @@ struct dsa_port { bool vlan_filtering; u8 stp_state; struct net_device *bridge_dev; + int bridge_num; struct devlink_port devlink_port; bool devlink_port_setup; struct phylink *pl; @@ -352,6 +350,9 @@ struct dsa_switch { unsigned int ageing_time_min; unsigned int ageing_time_max; + /* Storage for drivers using tag_8021q */ + struct dsa_8021q_context *tag_8021q_ctx; + /* devlink used to represent this switch device */ struct devlink *devlink; @@ -407,6 +408,12 @@ struct dsa_switch { */ unsigned int num_lag_ids; + /* Drivers that support bridge forwarding offload should set this to + * the maximum number of bridges spanning the same switch tree that can + * be offloaded. + */ + unsigned int num_fwd_offloading_bridges; + size_t num_ports; }; @@ -690,6 +697,14 @@ struct dsa_switch_ops { struct net_device *bridge); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct net_device *bridge); + /* Called right after .port_bridge_join() */ + int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, + struct net_device *bridge, + int bridge_num); + /* Called right before .port_bridge_leave() */ + void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, + struct net_device *bridge, + int bridge_num); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); @@ -869,6 +884,13 @@ struct dsa_switch_ops { const struct switchdev_obj_ring_role_mrp *mrp); int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port, const struct switchdev_obj_ring_role_mrp *mrp); + + /* + * tag_8021q operations + */ + int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, + u16 flags); + int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -954,15 +976,6 @@ static inline bool netdev_uses_dsa(const struct net_device *dev) return false; } -static inline bool dsa_can_decode(const struct sk_buff *skb, - struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_NET_DSA) - return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev); -#endif - return false; -} - /* All DSA tags that push the EtherType to the right (basically all except tail * tags, which don't break dissection) can be treated the same from the * perspective of the flow dissector. @@ -1003,8 +1016,8 @@ static inline int __dsa_netdevice_ops_check(struct net_device *dev) return 0; } -static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, - int cmd) +static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) { const struct dsa_netdevice_ops *ops; int err; @@ -1015,11 +1028,11 @@ static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, ops = dev->dsa_ptr->netdev_ops; - return ops->ndo_do_ioctl(dev, ifr, cmd); + return ops->ndo_eth_ioctl(dev, ifr, cmd); } #else -static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, - int cmd) +static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) { return -EOPNOTSUPP; } diff --git a/include/net/dst.h b/include/net/dst.h index 75b1e734e9c2..a057319aabef 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb) static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst) { + nskb->slow_gro |= !!refdst; nskb->_skb_refdst = refdst; if (!(nskb->_skb_refdst & SKB_DST_NOREF)) dst_clone(skb_dst(nskb)); @@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb) dst = NULL; skb->_skb_refdst = (unsigned long)dst; + skb->slow_gro |= !!dst; } return skb->_skb_refdst != 0UL; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 71bb4cc4d05d..42235c178b06 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -82,9 +82,6 @@ struct ip6_sf_socklist { struct in6_addr sl_addr[]; }; -#define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \ - (count) * sizeof(struct in6_addr)) - #define IP6_SFBLOCK 10 /* allocate this many at once */ struct ipv6_mc_socklist { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ca6a3ea9057e..f72ec113ae56 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -160,6 +160,12 @@ struct inet_hashinfo { ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_continue(__icsk) \ + hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) + +#define inet_lhash2_for_each_icsk(__icsk, list) \ + hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) + #define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) diff --git a/include/net/ioam6.h b/include/net/ioam6.h new file mode 100644 index 000000000000..3c2993bc48c8 --- /dev/null +++ b/include/net/ioam6.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _NET_IOAM6_H +#define _NET_IOAM6_H + +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/ioam6.h> +#include <linux/rhashtable-types.h> + +struct ioam6_namespace { + struct rhash_head head; + struct rcu_head rcu; + + struct ioam6_schema __rcu *schema; + + __be16 id; + __be32 data; + __be64 data_wide; +}; + +struct ioam6_schema { + struct rhash_head head; + struct rcu_head rcu; + + struct ioam6_namespace __rcu *ns; + + u32 id; + int len; + __be32 hdr; + + u8 data[0]; +}; + +struct ioam6_pernet_data { + struct mutex lock; + struct rhashtable namespaces; + struct rhashtable schemas; +}; + +static inline struct ioam6_pernet_data *ioam6_pernet(struct net *net) +{ +#if IS_ENABLED(CONFIG_IPV6) + return net->ipv6.ioam6_data; +#else + return NULL; +#endif +} + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id); +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace); + +int ioam6_init(void); +void ioam6_exit(void); + +int ioam6_iptunnel_init(void); +void ioam6_iptunnel_exit(void); + +#endif /* _NET_IOAM6_H */ diff --git a/include/net/ip.h b/include/net/ip.h index d9683bef8684..9192444f2964 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -436,18 +436,32 @@ static inline bool ip_sk_ignore_df(const struct sock *sk) static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { + const struct rtable *rt = container_of(dst, struct rtable, dst); struct net *net = dev_net(dst->dev); unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || - !forwarding) - return dst_mtu(dst); + !forwarding) { + mtu = rt->rt_pmtu; + if (mtu && time_before(jiffies, rt->dst.expires)) + goto out; + } /* 'forwarding = true' case should always honour route mtu */ mtu = dst_metric_raw(dst, RTAX_MTU); - if (!mtu) - mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); + if (mtu) + goto out; + + mtu = READ_ONCE(dst->dev->mtu); + + if (unlikely(ip_mtu_locked(dst))) { + if (rt->rt_uses_gateway && mtu > 576) + mtu = 576; + } + +out: + mtu = min_t(unsigned int, mtu, IP_MAX_MTU); return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 0bf09a9bca4e..5efd0b71dc67 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -316,12 +316,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info * !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } -static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst, + bool forwarding) { struct inet6_dev *idev; unsigned int mtu; - if (dst_metric_locked(dst, RTAX_MTU)) { + if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) { mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3ab2563b1a23..21c5386d4a6d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -133,7 +133,7 @@ struct fib_info { struct hlist_node fib_lhash; struct list_head nh_list; struct net *fib_net; - int fib_treeref; + refcount_t fib_treeref; refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 548b65bd3973..bc3b13ec93c9 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -270,7 +270,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); -int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/include/net/mctp.h b/include/net/mctp.h new file mode 100644 index 000000000000..54bbe042c973 --- /dev/null +++ b/include/net/mctp.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Management Component Transport Protocol (MCTP) + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#ifndef __NET_MCTP_H +#define __NET_MCTP_H + +#include <linux/bits.h> +#include <linux/mctp.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +/* MCTP packet definitions */ +struct mctp_hdr { + u8 ver; + u8 dest; + u8 src; + u8 flags_seq_tag; +}; + +#define MCTP_VER_MIN 1 +#define MCTP_VER_MAX 1 + +/* Definitions for flags_seq_tag field */ +#define MCTP_HDR_FLAG_SOM BIT(7) +#define MCTP_HDR_FLAG_EOM BIT(6) +#define MCTP_HDR_FLAG_TO BIT(3) +#define MCTP_HDR_FLAGS GENMASK(5, 3) +#define MCTP_HDR_SEQ_SHIFT 4 +#define MCTP_HDR_SEQ_MASK GENMASK(1, 0) +#define MCTP_HDR_TAG_SHIFT 0 +#define MCTP_HDR_TAG_MASK GENMASK(2, 0) + +#define MCTP_HEADER_MAXLEN 4 + +#define MCTP_INITIAL_DEFAULT_NET 1 + +static inline bool mctp_address_ok(mctp_eid_t eid) +{ + return eid >= 8 && eid < 255; +} + +static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) +{ + return (struct mctp_hdr *)skb_network_header(skb); +} + +/* socket implementation */ +struct mctp_sock { + struct sock sk; + + /* bind() params */ + int bind_net; + mctp_eid_t bind_addr; + __u8 bind_type; + + /* list of mctp_sk_key, for incoming tag lookup. updates protected + * by sk->net->keys_lock + */ + struct hlist_head keys; +}; + +/* Key for matching incoming packets to sockets or reassembly contexts. + * Packets are matched on (src,dest,tag). + * + * Lifetime requirements: + * + * - keys are free()ed via RCU + * + * - a mctp_sk_key contains a reference to a struct sock; this is valid + * for the life of the key. On sock destruction (through unhash), the key is + * removed from lists (see below), and will not be observable after a RCU + * grace period. + * + * any RX occurring within that grace period may still queue to the socket, + * but will hit the SOCK_DEAD case before the socket is freed. + * + * - these mctp_sk_keys appear on two lists: + * 1) the struct mctp_sock->keys list + * 2) the struct netns_mctp->keys list + * + * updates to either list are performed under the netns_mctp->keys + * lock. + * + * - a key may have a sk_buff attached as part of an in-progress message + * reassembly (->reasm_head). The reassembly context is protected by + * reasm_lock, which may be acquired with the keys lock (above) held, if + * necessary. Consequently, keys lock *cannot* be acquired with the + * reasm_lock held. + * + * - there are two destruction paths for a mctp_sk_key: + * + * - through socket unhash (see mctp_sk_unhash). This performs the list + * removal under keys_lock. + * + * - where a key is established to receive a reply message: after receiving + * the (complete) reply, or during reassembly errors. Here, we clean up + * the reassembly context (marking reasm_dead, to prevent another from + * starting), and remove the socket from the netns & socket lists. + */ +struct mctp_sk_key { + mctp_eid_t peer_addr; + mctp_eid_t local_addr; + __u8 tag; /* incoming tag match; invert TO for local */ + + /* we hold a ref to sk when set */ + struct sock *sk; + + /* routing lookup list */ + struct hlist_node hlist; + + /* per-socket list */ + struct hlist_node sklist; + + /* incoming fragment reassembly context */ + spinlock_t reasm_lock; + struct sk_buff *reasm_head; + struct sk_buff **reasm_tailp; + bool reasm_dead; + u8 last_seq; + + struct rcu_head rcu; +}; + +struct mctp_skb_cb { + unsigned int magic; + unsigned int net; + mctp_eid_t src; +}; + +/* skb control-block accessors with a little extra debugging for initial + * development. + * + * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb + * with mctp_cb(). + * + * __mctp_cb() is only for the initial ingress code; we should see ->magic set + * at all times after this. + */ +static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb) +{ + struct mctp_skb_cb *cb = (void *)skb->cb; + + cb->magic = 0x4d435450; + return cb; +} + +static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb) +{ + struct mctp_skb_cb *cb = (void *)skb->cb; + + WARN_ON(cb->magic != 0x4d435450); + return (void *)(skb->cb); +} + +/* Route definition. + * + * These are held in the pernet->mctp.routes list, with RCU protection for + * removed routes. We hold a reference to the netdev; routes need to be + * dropped on NETDEV_UNREGISTER events. + * + * Updates to the route table are performed under rtnl; all reads under RCU, + * so routes cannot be referenced over a RCU grace period. Specifically: A + * caller cannot block between mctp_route_lookup and passing the route to + * mctp_do_route. + */ +struct mctp_route { + mctp_eid_t min, max; + + struct mctp_dev *dev; + unsigned int mtu; + int (*output)(struct mctp_route *route, + struct sk_buff *skb); + + struct list_head list; + refcount_t refs; + struct rcu_head rcu; +}; + +/* route interfaces */ +struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr); + +int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb); + +int mctp_local_output(struct sock *sk, struct mctp_route *rt, + struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); + +/* routing <--> device interface */ +unsigned int mctp_default_net(struct net *net); +int mctp_default_net_set(struct net *net, unsigned int index); +int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr); +int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr); +void mctp_route_remove_dev(struct mctp_dev *mdev); + +/* neighbour definitions */ +enum mctp_neigh_source { + MCTP_NEIGH_STATIC, + MCTP_NEIGH_DISCOVER, +}; + +struct mctp_neigh { + struct mctp_dev *dev; + mctp_eid_t eid; + enum mctp_neigh_source source; + + unsigned char ha[MAX_ADDR_LEN]; + + struct list_head list; + struct rcu_head rcu; +}; + +int mctp_neigh_init(void); +void mctp_neigh_exit(void); + +// ret_hwaddr may be NULL, otherwise must have space for MAX_ADDR_LEN +int mctp_neigh_lookup(struct mctp_dev *dev, mctp_eid_t eid, + void *ret_hwaddr); +void mctp_neigh_remove_dev(struct mctp_dev *mdev); + +int mctp_routes_init(void); +void mctp_routes_exit(void); + +void mctp_device_init(void); +void mctp_device_exit(void); + +#endif /* __NET_MCTP_H */ diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h new file mode 100644 index 000000000000..57e773ff08bb --- /dev/null +++ b/include/net/mctpdevice.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Management Component Transport Protocol (MCTP) - device + * definitions. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#ifndef __NET_MCTPDEVICE_H +#define __NET_MCTPDEVICE_H + +#include <linux/list.h> +#include <linux/types.h> +#include <linux/refcount.h> + +struct mctp_dev { + struct net_device *dev; + + unsigned int net; + + /* Only modified under RTNL. Reads have addrs_lock held */ + u8 *addrs; + size_t num_addrs; + spinlock_t addrs_lock; + + struct rcu_head rcu; +}; + +#define MCTP_INITIAL_DEFAULT_NET 1 + +struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); +struct mctp_dev *__mctp_dev_get(const struct net_device *dev); +struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); + +#endif /* __NET_MCTPDEVICE_H */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 12cf6d7ea62c..cc54750dd3db 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -34,6 +34,7 @@ #include <net/netns/xdp.h> #include <net/netns/smc.h> #include <net/netns/bpf.h> +#include <net/netns/mctp.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> @@ -167,6 +168,9 @@ struct net { #ifdef CONFIG_XDP_SOCKETS struct netns_xdp xdp; #endif +#if IS_ENABLED(CONFIG_MCTP) + struct netns_mctp mctp; +#endif #if IS_ENABLED(CONFIG_CRYPTO_USER) struct sock *crypto_nlsk; #endif diff --git a/include/net/netlink.h b/include/net/netlink.h index 1ceec518ab49..7a2a9d3144ba 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -885,7 +885,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, */ static inline int nlmsg_report(const struct nlmsghdr *nlh) { - return !!(nlh->nlmsg_flags & NLM_F_ECHO); + return nlh ? !!(nlh->nlmsg_flags & NLM_F_ECHO) : 0; } /** diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b8620519eace..2f65701a43c9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -174,7 +174,6 @@ struct netns_ipv4 { int sysctl_tcp_fastopen; const struct tcp_congestion_ops __rcu *tcp_congestion_control; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; - spinlock_t tcp_fastopen_ctx_lock; unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index bde0b7adb4a3..a4b550380316 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -51,6 +51,8 @@ struct netns_sysctl_ipv6 { int max_dst_opts_len; int max_hbh_opts_len; int seg6_flowlabel; + u32 ioam6_id; + u64 ioam6_id_wide; bool skip_notify_on_dev_down; u8 fib_notify_on_flag_change; }; @@ -110,6 +112,7 @@ struct netns_ipv6 { spinlock_t lock; u32 seq; } ip6addrlbl_table; + struct ioam6_pernet_data *ioam6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h new file mode 100644 index 000000000000..acedef12a35e --- /dev/null +++ b/include/net/netns/mctp.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * MCTP per-net structures + */ + +#ifndef __NETNS_MCTP_H__ +#define __NETNS_MCTP_H__ + +#include <linux/types.h> + +struct netns_mctp { + /* Only updated under RTNL, entries freed via RCU */ + struct list_head routes; + + /* Bound sockets: list of sockets bound by type. + * This list is updated from non-atomic contexts (under bind_lock), + * and read (under rcu) in packet rx + */ + struct mutex bind_lock; + struct hlist_head binds; + + /* tag allocations. This list is read and updated from atomic contexts, + * but elements are free()ed after a RCU grace-period + */ + spinlock_t keys_lock; + struct hlist_head keys; + + /* MCTP network */ + unsigned int default_net; + + /* neighbour table */ + struct mutex neigh_lock; + struct list_head neighbours; +}; + +#endif /* __NETNS_MCTP_H__ */ diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 963db96bcbbb..bb3e8fdc0692 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -191,7 +191,7 @@ struct digital_poll_tech { struct nfc_digital_dev { struct nfc_dev *nfc_dev; - struct nfc_digital_ops *ops; + const struct nfc_digital_ops *ops; u32 protocols; @@ -236,7 +236,7 @@ struct nfc_digital_dev { void (*skb_add_crc)(struct sk_buff *skb); }; -struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, +struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index b35f37a57686..756c11084f65 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -118,7 +118,7 @@ struct nfc_hci_dev { struct sk_buff_head msg_rx_queue; - struct nfc_hci_ops *ops; + const struct nfc_hci_ops *ops; struct nfc_llc *llc; @@ -151,7 +151,7 @@ struct nfc_hci_dev { }; /* hci device allocation */ -struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, +struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, @@ -168,7 +168,7 @@ void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata); void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev); static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds); diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 1df0f8074c9d..a964daedc17b 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -82,10 +82,10 @@ struct nci_ops { void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb); - struct nci_driver_ops *prop_ops; + const struct nci_driver_ops *prop_ops; size_t n_prop_ops; - struct nci_driver_ops *core_ops; + const struct nci_driver_ops *core_ops; size_t n_core_ops; }; @@ -194,7 +194,7 @@ struct nci_hci_dev { /* NCI Core structures */ struct nci_dev { struct nfc_dev *nfc_dev; - struct nci_ops *ops; + const struct nci_ops *ops; struct nci_hci_dev *hci_dev; int tx_headroom; @@ -267,7 +267,7 @@ struct nci_dev { }; /* ----- NCI Devices ----- */ -struct nci_dev *nci_allocate_device(struct nci_ops *ops, +struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom); @@ -276,25 +276,27 @@ int nci_register_device(struct nci_dev *ndev); void nci_unregister_device(struct nci_dev *ndev); int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout); -int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload); -int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload); + const void *opt), + const void *opt, __u32 timeout); +int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, + const __u8 *payload); +int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, + const __u8 *payload); int nci_core_reset(struct nci_dev *ndev); int nci_core_init(struct nci_dev *ndev); int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb); -int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val); +int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val); int nci_nfcee_discover(struct nci_dev *ndev, u8 action); int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode); int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, u8 number_destination_params, size_t params_len, - struct core_conn_create_dest_spec_params *params); + const struct core_conn_create_dest_spec_params *params); int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id); -int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, +int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, struct sk_buff **resp); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev); @@ -343,7 +345,7 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev) } static inline int nci_set_vendor_cmds(struct nci_dev *ndev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds); @@ -360,7 +362,7 @@ int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode, int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb); void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb); -int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload); +int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload); int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb); int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id); void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, @@ -378,7 +380,7 @@ void nci_req_complete(struct nci_dev *ndev, int result); struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, int conn_id); int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, - struct dest_spec_params *params); + const struct dest_spec_params *params); /* ----- NCI status code ----- */ int nci_to_errno(__u8 code); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 2cd3a261bcbc..5dee575fbe86 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -188,17 +188,17 @@ struct nfc_dev { struct rfkill *rfkill; - struct nfc_vendor_cmd *vendor_cmds; + const struct nfc_vendor_cmd *vendor_cmds; int n_vendor_cmds; - struct nfc_ops *ops; + const struct nfc_ops *ops; struct genl_info *cur_cmd_info; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) extern struct class nfc_class; -struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, +struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, int tx_headroom, int tx_tailroom); @@ -245,7 +245,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data) * * @dev: The nfc device */ -static inline void *nfc_get_drvdata(struct nfc_dev *dev) +static inline void *nfc_get_drvdata(const struct nfc_dev *dev) { return dev_get_drvdata(&dev->dev); } @@ -255,7 +255,7 @@ static inline void *nfc_get_drvdata(struct nfc_dev *dev) * * @dev: The nfc device whose name to return */ -static inline const char *nfc_device_name(struct nfc_dev *dev) +static inline const char *nfc_device_name(const struct nfc_dev *dev) { return dev_name(&dev->dev); } @@ -266,7 +266,7 @@ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk, struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp); int nfc_set_remote_general_bytes(struct nfc_dev *dev, - u8 *gt, u8 gt_len); + const u8 *gt, u8 gt_len); u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len); int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name, @@ -280,7 +280,7 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, - u8 *gb, size_t gb_len); + const u8 *gb, size_t gb_len); int nfc_tm_deactivated(struct nfc_dev *dev); int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); @@ -297,7 +297,7 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, u8 payload_type, u8 direction); static inline int nfc_set_vendor_cmds(struct nfc_dev *dev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { if (dev->vendor_cmds || dev->n_vendor_cmds) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 298a8d10168b..8fb47fc61097 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -76,12 +76,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return block->q; } -int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); -int tcf_classify_ingress(struct sk_buff *skb, - const struct tcf_block *ingress_block, - const struct tcf_proto *tp, struct tcf_result *res, - bool compat_mode); +int tcf_classify(struct sk_buff *skb, + const struct tcf_block *block, + const struct tcf_proto *tp, struct tcf_result *res, + bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -138,20 +136,14 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb, { } -static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, +static inline int tcf_classify(struct sk_buff *skb, + const struct tcf_block *block, + const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { return TC_ACT_UNSPEC; } -static inline int tcf_classify_ingress(struct sk_buff *skb, - const struct tcf_block *ingress_block, - const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) -{ - return TC_ACT_UNSPEC; -} - #endif static inline unsigned long @@ -327,7 +319,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr, bool rtnl_held, + struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 384e800665f2..9f48733bfd21 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -153,7 +153,8 @@ struct rtnl_af_ops { u32 ext_filter_mask); int (*validate_link_af)(const struct net_device *dev, - const struct nlattr *attr); + const struct nlattr *attr, + struct netlink_ext_ack *extack); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr, struct netlink_ext_ack *extack); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9ed33e6840bd..c0069ac00e62 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -357,7 +357,7 @@ struct tcf_proto_ops { int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool, bool, + void **, u32, struct netlink_ext_ack *); int (*delete)(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, diff --git a/include/net/sock.h b/include/net/sock.h index f23cb259b0e2..6e761451c927 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -68,6 +68,7 @@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> #include <net/l3mdev.h> +#include <uapi/linux/socket.h> /* * This structure really needs to be cleaned up. @@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk) #define RCV_SHUTDOWN 1 #define SEND_SHUTDOWN 2 -#define SOCK_SNDBUF_LOCK 1 -#define SOCK_RCVBUF_LOCK 2 #define SOCK_BINDADDR_LOCK 4 #define SOCK_BINDPORT_LOCK 8 @@ -2249,6 +2248,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc return false; } +static inline void skb_prepare_for_gro(struct sk_buff *skb) +{ + if (skb->destructor != sock_wfree) { + skb_orphan(skb); + return; + } + skb->slow_gro = 1; +} + void sk_reset_timer(struct sock *sk, struct timer_list *timer, unsigned long expires); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e4cac9218ce1..60d806b6a5ae 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp { typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); +struct switchdev_brport { + struct net_device *dev; + const void *ctx; + struct notifier_block *atomic_nb; + struct notifier_block *blocking_nb; + bool tx_fwd_offload; +}; + enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, SWITCHDEV_FDB_DEL_TO_BRIDGE, @@ -197,6 +205,9 @@ enum switchdev_notifier_type { SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, SWITCHDEV_VXLAN_FDB_OFFLOADED, + + SWITCHDEV_BRPORT_OFFLOADED, + SWITCHDEV_BRPORT_UNOFFLOADED, }; struct switchdev_notifier_info { @@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info { bool handled; }; +struct switchdev_notifier_brport_info { + struct switchdev_notifier_info info; /* must be first */ + const struct switchdev_brport brport; +}; + static inline struct net_device * switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { @@ -238,8 +254,25 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info) return info->extack; } +static inline bool +switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *fdb_info) +{ + return !fdb_info->added_by_user && !fdb_info->is_local; +} + #ifdef CONFIG_NET_SWITCHDEV +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, @@ -266,6 +299,30 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); +int switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)); + +int switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)); + int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), @@ -286,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev, struct netlink_ext_ack *extack)); #else +static inline int +switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + static inline void switchdev_deferred_process(void) { } @@ -350,6 +426,38 @@ call_switchdev_blocking_notifiers(unsigned long val, } static inline int +switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + return 0; +} + +static inline int +switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + return 0; +} + +static inline int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), diff --git a/include/net/tcp.h b/include/net/tcp.h index 784d5c3ef1c5..3166dc15d7d6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1958,7 +1958,6 @@ struct tcp_iter_state { struct seq_net_private p; enum tcp_seq_states state; struct sock *syn_wait_sk; - struct tcp_seq_afinfo *bpf_seq_afinfo; int bucket, offset, sbucket, num; loff_t last_pos; }; diff --git a/include/net/xdp.h b/include/net/xdp.h index 5533f0ab2afc..ad5b02dcb6f4 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) return unlikely(xdp->data_meta > xdp->data); } +static inline bool xdp_metalen_invalid(unsigned long metalen) +{ + return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); +} + struct xdp_attachment_info { struct bpf_prog *prog; u32 flags; diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index c3006c6b4a87..59c945b66f9c 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -54,6 +54,7 @@ TRACE_EVENT(qdisc_enqueue, TP_STRUCT__entry( __field(struct Qdisc *, qdisc) + __field(const struct netdev_queue *, txq) __field(void *, skbaddr) __field(int, ifindex) __field(u32, handle) @@ -62,6 +63,7 @@ TRACE_EVENT(qdisc_enqueue, TP_fast_assign( __entry->qdisc = qdisc; + __entry->txq = txq; __entry->skbaddr = skb; __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; __entry->handle = qdisc->handle; diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index d588c244ec2f..1f0a2b4864e4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -124,6 +124,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf9252c7381e..2db6925e04f4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * @@ -3249,7 +3246,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -4780,6 +4777,76 @@ union bpf_attr { * Execute close syscall for given FD. * Return * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4951,6 +5018,11 @@ union bpf_attr { FN(sys_bpf), \ FN(btf_find_by_name_kind), \ FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6077,6 +6149,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h index df6e821075c1..38936460f668 100644 --- a/include/uapi/linux/can/j1939.h +++ b/include/uapi/linux/can/j1939.h @@ -78,11 +78,20 @@ enum { enum { J1939_NLA_PAD, J1939_NLA_BYTES_ACKED, + J1939_NLA_TOTAL_SIZE, + J1939_NLA_PGN, + J1939_NLA_SRC_NAME, + J1939_NLA_DEST_NAME, + J1939_NLA_SRC_ADDR, + J1939_NLA_DEST_ADDR, }; enum { J1939_EE_INFO_NONE, J1939_EE_INFO_TX_ABORT, + J1939_EE_INFO_RX_RTS, + J1939_EE_INFO_RX_DPO, + J1939_EE_INFO_RX_ABORT, }; struct j1939_filter { diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index c3cc5a9e5eaf..4783af9fe520 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -54,6 +54,7 @@ #define ARPHRD_X25 271 /* CCITT X.25 */ #define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */ #define ARPHRD_CAN 280 /* Controller Area Network */ +#define ARPHRD_MCTP 290 #define ARPHRD_PPP 512 #define ARPHRD_CISCO 513 /* Cisco HDLC */ #define ARPHRD_HDLC ARPHRD_CISCO diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 6b56a7549531..5aca85874447 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -479,16 +479,22 @@ enum { /* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ #define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ +#define BRIDGE_VLANDB_DUMPF_GLOBAL (1 << 1) /* Dump global vlan options only */ /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { * [BRIDGE_VLANDB_ENTRY_INFO] * ... * } + * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = { + * [BRIDGE_VLANDB_GOPTS_ID] + * ... + * } */ enum { BRIDGE_VLANDB_UNSPEC, BRIDGE_VLANDB_ENTRY, + BRIDGE_VLANDB_GLOBAL_OPTIONS, __BRIDGE_VLANDB_MAX, }; #define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1) @@ -538,6 +544,15 @@ enum { }; #define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) +enum { + BRIDGE_VLANDB_GOPTS_UNSPEC, + BRIDGE_VLANDB_GOPTS_ID, + BRIDGE_VLANDB_GOPTS_RANGE, + BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + __BRIDGE_VLANDB_GOPTS_MAX +}; +#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { @@ -629,6 +644,7 @@ enum { MDBA_ROUTER_PATTR_TYPE, MDBA_ROUTER_PATTR_INET_TIMER, MDBA_ROUTER_PATTR_INET6_TIMER, + MDBA_ROUTER_PATTR_VID, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) @@ -720,12 +736,14 @@ struct br_mcast_stats { /* bridge boolean options * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, + BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MAX }; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index a0b637911d3c..5f589c7a8382 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -151,6 +151,9 @@ #define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and * aggregation protocol */ +#define ETH_P_MCTP 0x00FA /* Management component transport + * protocol packets + */ /* * This is an Ethernet frame header. diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4882e81514b6..5310003523ce 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -855,6 +855,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; @@ -1260,4 +1261,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index d1b327036ae4..14168225cecd 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -188,11 +188,22 @@ struct ip_mreq_source { }; struct ip_msfilter { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + struct { + __be32 imsf_multiaddr_aux; + __be32 imsf_interface_aux; + __u32 imsf_fmode_aux; + __u32 imsf_numsrc_aux; + __be32 imsf_slist[1]; + }; + struct { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + __be32 imsf_slist_flex[]; + }; + }; }; #define IP_MSFILTER_SIZE(numsrc) \ @@ -211,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5ad396a57eb3..c4c53a9ab959 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,6 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h new file mode 100644 index 000000000000..ac4de376f0ce --- /dev/null +++ b/include/uapi/linux/ioam6.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _UAPI_LINUX_IOAM6_H +#define _UAPI_LINUX_IOAM6_H + +#include <asm/byteorder.h> +#include <linux/types.h> + +#define IOAM6_U16_UNAVAILABLE U16_MAX +#define IOAM6_U32_UNAVAILABLE U32_MAX +#define IOAM6_U64_UNAVAILABLE U64_MAX + +#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE +#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE + +/* + * IPv6 IOAM Option Header + */ +struct ioam6_hdr { + __u8 opt_type; + __u8 opt_len; + __u8 :8; /* reserved */ +#define IOAM6_TYPE_PREALLOC 0 + __u8 type; +} __attribute__((packed)); + +/* + * IOAM Trace Header + */ +struct ioam6_trace_hdr { + __be16 namespace_id; + +#if defined(__LITTLE_ENDIAN_BITFIELD) + + __u8 :1, /* unused */ + :1, /* unused */ + overflow:1, + nodelen:5; + + __u8 remlen:7, + :1; /* unused */ + + union { + __be32 type_be32; + + struct { + __u32 bit7:1, + bit6:1, + bit5:1, + bit4:1, + bit3:1, + bit2:1, + bit1:1, + bit0:1, + bit15:1, /* unused */ + bit14:1, /* unused */ + bit13:1, /* unused */ + bit12:1, /* unused */ + bit11:1, + bit10:1, + bit9:1, + bit8:1, + bit23:1, /* reserved */ + bit22:1, + bit21:1, /* unused */ + bit20:1, /* unused */ + bit19:1, /* unused */ + bit18:1, /* unused */ + bit17:1, /* unused */ + bit16:1, /* unused */ + :8; /* reserved */ + } type; + }; + +#elif defined(__BIG_ENDIAN_BITFIELD) + + __u8 nodelen:5, + overflow:1, + :1, /* unused */ + :1; /* unused */ + + __u8 :1, /* unused */ + remlen:7; + + union { + __be32 type_be32; + + struct { + __u32 bit0:1, + bit1:1, + bit2:1, + bit3:1, + bit4:1, + bit5:1, + bit6:1, + bit7:1, + bit8:1, + bit9:1, + bit10:1, + bit11:1, + bit12:1, /* unused */ + bit13:1, /* unused */ + bit14:1, /* unused */ + bit15:1, /* unused */ + bit16:1, /* unused */ + bit17:1, /* unused */ + bit18:1, /* unused */ + bit19:1, /* unused */ + bit20:1, /* unused */ + bit21:1, /* unused */ + bit22:1, + bit23:1, /* reserved */ + :8; /* reserved */ + } type; + }; + +#else +#error "Please fix <asm/byteorder.h>" +#endif + +#define IOAM6_TRACE_DATA_SIZE_MAX 244 + __u8 data[0]; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_IOAM6_H */ diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h new file mode 100644 index 000000000000..ca4b22833754 --- /dev/null +++ b/include/uapi/linux/ioam6_genl.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Generic Netlink API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _UAPI_LINUX_IOAM6_GENL_H +#define _UAPI_LINUX_IOAM6_GENL_H + +#define IOAM6_GENL_NAME "IOAM6" +#define IOAM6_GENL_VERSION 0x1 + +enum { + IOAM6_ATTR_UNSPEC, + + IOAM6_ATTR_NS_ID, /* u16 */ + IOAM6_ATTR_NS_DATA, /* u32 */ + IOAM6_ATTR_NS_DATA_WIDE,/* u64 */ + +#define IOAM6_MAX_SCHEMA_DATA_LEN (255 * 4) + IOAM6_ATTR_SC_ID, /* u32 */ + IOAM6_ATTR_SC_DATA, /* Binary */ + IOAM6_ATTR_SC_NONE, /* Flag */ + + IOAM6_ATTR_PAD, + + __IOAM6_ATTR_MAX, +}; + +#define IOAM6_ATTR_MAX (__IOAM6_ATTR_MAX - 1) + +enum { + IOAM6_CMD_UNSPEC, + + IOAM6_CMD_ADD_NAMESPACE, + IOAM6_CMD_DEL_NAMESPACE, + IOAM6_CMD_DUMP_NAMESPACES, + + IOAM6_CMD_ADD_SCHEMA, + IOAM6_CMD_DEL_SCHEMA, + IOAM6_CMD_DUMP_SCHEMAS, + + IOAM6_CMD_NS_SET_SCHEMA, + + __IOAM6_CMD_MAX, +}; + +#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_GENL_H */ diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..bae14636a8c8 --- /dev/null +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H +#define _UAPI_LINUX_IOAM6_IPTUNNEL_H + +enum { + IOAM6_IPTUNNEL_UNSPEC, + IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + __IOAM6_IPTUNNEL_MAX, +}; + +#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 70603775fe91..b243a53fa985 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -190,6 +190,9 @@ enum { DEVCONF_NDISC_TCLASS, DEVCONF_RPL_SEG_ENABLED, DEVCONF_RA_DEFRTR_METRIC, + DEVCONF_IOAM6_ENABLED, + DEVCONF_IOAM6_ID, + DEVCONF_IOAM6_ID_WIDE, DEVCONF_MAX }; diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 568a4303ccce..2e206919125c 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -14,6 +14,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_BPF, LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, + LWTUNNEL_ENCAP_IOAM6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h new file mode 100644 index 000000000000..52b54d13f385 --- /dev/null +++ b/include/uapi/linux/mctp.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Management Component Transport Protocol (MCTP) + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#ifndef __UAPI_MCTP_H +#define __UAPI_MCTP_H + +#include <linux/types.h> + +typedef __u8 mctp_eid_t; + +struct mctp_addr { + mctp_eid_t s_addr; +}; + +struct sockaddr_mctp { + unsigned short int smctp_family; + int smctp_network; + struct mctp_addr smctp_addr; + __u8 smctp_type; + __u8 smctp_tag; +}; + +#define MCTP_NET_ANY 0x0 + +#define MCTP_ADDR_NULL 0x00 +#define MCTP_ADDR_ANY 0xff + +#define MCTP_TAG_MASK 0x07 +#define MCTP_TAG_OWNER 0x08 + +#endif /* __UAPI_MCTP_H */ diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8d16744edc31..150bcff49b1c 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,6 +70,8 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the @@ -87,6 +89,9 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in + * per-cpu dispatch mode + */ __OVS_DP_ATTR_MAX }; @@ -127,6 +132,9 @@ struct ovs_vport_stats { /* Allow tc offload recirc sharing */ #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) +/* Allow per-cpu dispatch of upcalls */ +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 025c40fef93d..6836ccb9c45d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,6 +22,7 @@ enum { __TCA_ACT_MAX }; +/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for * actions stats. */ diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index c3409c8ec0dd..eb0a9a5b6e71 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage { }; }; +#define SOCK_SNDBUF_LOCK 1 +#define SOCK_RCVBUF_LOCK 2 + +#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) + #endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index c525b3503797..af6ef2cfbf3d 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -17,6 +17,7 @@ #define SKBMOD_F_SMAC 0x2 #define SKBMOD_F_ETYPE 0x4 #define SKBMOD_F_SWAPMAC 0x8 +#define SKBMOD_F_ECN 0x10 struct tc_skbmod { tc_gen; diff --git a/init/main.c b/init/main.c index f5b8246e8aa1..11cbbec309fa 100644 --- a/init/main.c +++ b/init/main.c @@ -1221,7 +1221,7 @@ trace_initcall_start_cb(void *data, initcall_t fn) { ktime_t *calltime = (ktime_t *)data; - printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current)); + printk(KERN_DEBUG "calling %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled()); *calltime = ktime_get(); } @@ -1235,8 +1235,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) rettime = ktime_get(); delta = ktime_sub(rettime, *calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", - fn, ret, duration); + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n", + fn, ret, duration, irqs_disabled()); } static ktime_t initcall_calltime; diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index bd04f4a44c01..a82d6de86522 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -29,7 +29,7 @@ config BPF_SYSCALL select IRQ_WORK select TASKS_TRACE_RCU select BINARY_PRINTF - select NET_SOCK_MSG if INET + select NET_SOCK_MSG if NET default n help Enable the bpf() system call that allows to manipulate BPF programs diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3c4105603f9d..cebd4fb06d19 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -287,6 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key return 0; } +static void check_and_free_timer_in_array(struct bpf_array *arr, void *val) +{ + if (unlikely(map_value_has_timer(&arr->map))) + bpf_timer_cancel_and_free(val + arr->map.timer_off); +} + /* Called from syscall or from eBPF program */ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) @@ -321,6 +327,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, copy_map_value_locked(map, val, value, false); else copy_map_value(map, val, value); + check_and_free_timer_in_array(array, val); } return 0; } @@ -374,6 +381,19 @@ static void *array_map_vmalloc_addr(struct bpf_array *array) return (void *)round_down((unsigned long)array, PAGE_SIZE); } +static void array_map_free_timers(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + if (likely(!map_value_has_timer(map))) + return; + + for (i = 0; i < array->map.max_entries; i++) + bpf_timer_cancel_and_free(array->value + array->elem_size * i + + map->timer_off); +} + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void array_map_free(struct bpf_map *map) { @@ -668,6 +688,7 @@ const struct bpf_map_ops array_map_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, + .map_release_uref = array_map_free_timers, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 2d4fbdbb194e..2e9d47bb40ff 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog) return supported; } +const struct bpf_func_proto * +bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + const struct bpf_iter_target_info *tinfo; + const struct bpf_func_proto *fn = NULL; + + mutex_lock(&targets_mutex); + list_for_each_entry(tinfo, &targets, list) { + if (tinfo->btf_id == prog->aux->attach_btf_id) { + const struct bpf_iter_reg *reg_info; + + reg_info = tinfo->reg_info; + if (reg_info->get_func_proto) + fn = reg_info->get_func_proto(func_id, prog); + break; + } + } + mutex_unlock(&targets_mutex); + + return fn; +} + static void bpf_iter_link_release(struct bpf_link *link) { struct bpf_iter_link *iter_link = diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index cb4b72997d9b..c395024610ed 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3046,43 +3046,92 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -/* find 'struct bpf_spin_lock' in map value. - * return >= 0 offset if found - * and < 0 in case of error - */ -int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t, + const char *name, int sz, int align) { const struct btf_member *member; u32 i, off = -ENOENT; - if (!__btf_type_is_struct(t)) - return -EINVAL; - for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); if (!__btf_type_is_struct(member_type)) continue; - if (member_type->size != sizeof(struct bpf_spin_lock)) + if (member_type->size != sz) continue; - if (strcmp(__btf_name_by_offset(btf, member_type->name_off), - "bpf_spin_lock")) + if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name)) continue; if (off != -ENOENT) - /* only one 'struct bpf_spin_lock' is allowed */ + /* only one such field is allowed */ return -E2BIG; off = btf_member_bit_offset(t, member); if (off % 8) /* valid C code cannot generate such BTF */ return -EINVAL; off /= 8; - if (off % __alignof__(struct bpf_spin_lock)) - /* valid struct bpf_spin_lock will be 4 byte aligned */ + if (off % align) return -EINVAL; } return off; } +static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, + const char *name, int sz, int align) +{ + const struct btf_var_secinfo *vsi; + u32 i, off = -ENOENT; + + for_each_vsi(i, t, vsi) { + const struct btf_type *var = btf_type_by_id(btf, vsi->type); + const struct btf_type *var_type = btf_type_by_id(btf, var->type); + + if (!__btf_type_is_struct(var_type)) + continue; + if (var_type->size != sz) + continue; + if (vsi->size != sz) + continue; + if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name)) + continue; + if (off != -ENOENT) + /* only one such field is allowed */ + return -E2BIG; + off = vsi->offset; + if (off % align) + return -EINVAL; + } + return off; +} + +static int btf_find_field(const struct btf *btf, const struct btf_type *t, + const char *name, int sz, int align) +{ + + if (__btf_type_is_struct(t)) + return btf_find_struct_field(btf, t, name, sz, align); + else if (btf_type_is_datasec(t)) + return btf_find_datasec_var(btf, t, name, sz, align); + return -EINVAL; +} + +/* find 'struct bpf_spin_lock' in map value. + * return >= 0 offset if found + * and < 0 in case of error + */ +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +{ + return btf_find_field(btf, t, "bpf_spin_lock", + sizeof(struct bpf_spin_lock), + __alignof__(struct bpf_spin_lock)); +} + +int btf_find_timer(const struct btf *btf, const struct btf_type *t) +{ + return btf_find_field(btf, t, "bpf_timer", + sizeof(struct bpf_timer), + __alignof__(struct bpf_timer)); +} + static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) @@ -4776,6 +4825,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; if (ctx_arg_info->offset == off) { + if (!ctx_arg_info->btf_id) { + bpf_log(log,"invalid btf_id for context argument offset %u\n", off); + return false; + } + info->reg_type = ctx_arg_info->reg_type; info->btf = btf_vmlinux; info->btf_id = ctx_arg_info->btf_id; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 480e936c54d0..585b2b77ccc4 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -16,6 +16,7 @@ * netstack, and assigning dedicated CPUs for this stage. This * basically allows for 10G wirespeed pre-filtering via bpf. */ +#include <linux/bitops.h> #include <linux/bpf.h> #include <linux/filter.h> #include <linux/ptr_ring.h> @@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) } } +static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, + struct list_head *listp, + struct xdp_cpumap_stats *stats) +{ + struct sk_buff *skb, *tmp; + struct xdp_buff xdp; + u32 act; + int err; + + list_for_each_entry_safe(skb, tmp, listp, list) { + act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog); + switch (act) { + case XDP_PASS: + break; + case XDP_REDIRECT: + skb_list_del_init(skb); + err = xdp_do_generic_redirect(skb->dev, skb, &xdp, + rcpu->prog); + if (unlikely(err)) { + kfree_skb(skb); + stats->drop++; + } else { + stats->redirect++; + } + return; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(skb->dev, rcpu->prog, act); + fallthrough; + case XDP_DROP: + skb_list_del_init(skb); + kfree_skb(skb); + stats->drop++; + return; + } + } +} + static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) @@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, struct xdp_buff xdp; int i, nframes = 0; - if (!rcpu->prog) - return n; - - rcu_read_lock_bh(); - xdp_set_return_frame_no_direct(); xdp.rxq = &rxq; @@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, } } + xdp_clear_return_frame_no_direct(); + + return nframes; +} + +#define CPUMAP_BATCH 8 + +static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, + int xdp_n, struct xdp_cpumap_stats *stats, + struct list_head *list) +{ + int nframes; + + if (!rcpu->prog) + return xdp_n; + + rcu_read_lock_bh(); + + nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats); + if (stats->redirect) - xdp_do_flush_map(); + xdp_do_flush(); - xdp_clear_return_frame_no_direct(); + if (unlikely(!list_empty(list))) + cpu_map_bpf_prog_run_skb(rcpu, list, stats); rcu_read_unlock_bh(); /* resched point, may call do_softirq() */ return nframes; } -#define CPUMAP_BATCH 8 static int cpu_map_kthread_run(void *data) { @@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data) struct xdp_cpumap_stats stats = {}; /* zero stats */ unsigned int kmem_alloc_drops = 0, sched = 0; gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; + int i, n, m, nframes, xdp_n; void *frames[CPUMAP_BATCH]; void *skbs[CPUMAP_BATCH]; - int i, n, m, nframes; LIST_HEAD(list); /* Release CPU reschedule checks */ @@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data) */ n = __ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); - for (i = 0; i < n; i++) { + for (i = 0, xdp_n = 0; i < n; i++) { void *f = frames[i]; - struct page *page = virt_to_page(f); + struct page *page; + + if (unlikely(__ptr_test_bit(0, &f))) { + struct sk_buff *skb = f; + + __ptr_clear_bit(0, &skb); + list_add_tail(&skb->list, &list); + continue; + } + + frames[xdp_n++] = f; + page = virt_to_page(f); /* Bring struct page memory area to curr CPU. Read by * build_skb_around via page_is_pfmemalloc(), and when @@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data) } /* Support running another XDP prog on this CPU */ - nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats); + nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list); if (nframes) { m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs); if (unlikely(m == 0)) { @@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data) return 0; } -bool cpu_map_prog_allowed(struct bpf_map *map) -{ - return map->map_type == BPF_MAP_TYPE_CPUMAP && - map->value_size != offsetofend(struct bpf_cpumap_val, qsize); -} - static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) { struct bpf_prog *prog; @@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, return 0; } +int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb) +{ + int ret; + + __skb_pull(skb, skb->mac_len); + skb_set_redirected(skb, false); + __ptr_set_bit(0, &skb); + + ret = ptr_ring_produce(rcpu->queue, skb); + if (ret < 0) + goto trace; + + wake_up_process(rcpu->kthread); +trace: + trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu); + return ret; +} + void __cpu_map_flush(void) { struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fdc20892837c..542e94fa30b4 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, return -ENOENT; } -bool dev_map_can_have_prog(struct bpf_map *map) -{ - if ((map->map_type == BPF_MAP_TYPE_DEVMAP || - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) && - map->value_size != offsetofend(struct bpf_devmap_val, ifindex)) - return true; - - return false; -} - static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, struct xdp_frame **frames, int n, struct net_device *dev) @@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, return 0; } +static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst) +{ + struct xdp_txq_info txq = { .dev = dst->dev }; + struct xdp_buff xdp; + u32 act; + + if (!dst->xdp_prog) + return XDP_PASS; + + __skb_pull(skb, skb->mac_len); + xdp.txq = &txq; + + act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog); + switch (act) { + case XDP_PASS: + __skb_push(skb, skb->mac_len); + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dst->dev, dst->xdp_prog, act); + fallthrough; + case XDP_DROP: + kfree_skb(skb); + break; + } + + return act; +} + int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx) { @@ -615,6 +636,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, err = xdp_ok_fwd_dev(dst->dev, skb->len); if (unlikely(err)) return err; + + /* Redirect has already succeeded semantically at this point, so we just + * return 0 even if packet is dropped. Helper below takes care of + * freeing skb. + */ + if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS) + return 0; + skb->dev = dst->dev; generic_xdp_tx(skb, xdp_prog); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 72c58cc516a3..6dc3fae46a56 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -228,6 +228,32 @@ static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); } +static bool htab_has_extra_elems(struct bpf_htab *htab) +{ + return !htab_is_percpu(htab) && !htab_is_lru(htab); +} + +static void htab_free_prealloced_timers(struct bpf_htab *htab) +{ + u32 num_entries = htab->map.max_entries; + int i; + + if (likely(!map_value_has_timer(&htab->map))) + return; + if (htab_has_extra_elems(htab)) + num_entries += num_possible_cpus(); + + for (i = 0; i < num_entries; i++) { + struct htab_elem *elem; + + elem = get_htab_elem(htab, i); + bpf_timer_cancel_and_free(elem->key + + round_up(htab->map.key_size, 8) + + htab->map.timer_off); + cond_resched(); + } +} + static void htab_free_elems(struct bpf_htab *htab) { int i; @@ -265,8 +291,12 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, struct htab_elem *l; if (node) { + u32 key_size = htab->map.key_size; + l = container_of(node, struct htab_elem, lru_node); - memcpy(l->key, key, htab->map.key_size); + memcpy(l->key, key, key_size); + check_and_init_map_value(&htab->map, + l->key + round_up(key_size, 8)); return l; } @@ -278,7 +308,7 @@ static int prealloc_init(struct bpf_htab *htab) u32 num_entries = htab->map.max_entries; int err = -ENOMEM, i; - if (!htab_is_percpu(htab) && !htab_is_lru(htab)) + if (htab_has_extra_elems(htab)) num_entries += num_possible_cpus(); htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, @@ -695,6 +725,14 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } +static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem) +{ + if (unlikely(map_value_has_timer(&htab->map))) + bpf_timer_cancel_and_free(elem->key + + round_up(htab->map.key_size, 8) + + htab->map.timer_off); +} + /* It is called from the bpf_lru_list when the LRU needs to delete * older elements from the htab. */ @@ -719,6 +757,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l == tgt_l) { hlist_nulls_del_rcu(&l->hash_node); + check_and_free_timer(htab, l); break; } @@ -790,6 +829,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); + check_and_free_timer(htab, l); kfree(l); } @@ -817,6 +857,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) htab_put_fd_value(htab, l); if (htab_is_prealloc(htab)) { + check_and_free_timer(htab, l); __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); @@ -920,8 +961,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new = ERR_PTR(-ENOMEM); goto dec_count; } - check_and_init_map_lock(&htab->map, - l_new->key + round_up(key_size, 8)); + check_and_init_map_value(&htab->map, + l_new->key + round_up(key_size, 8)); } memcpy(l_new->key, key, key_size); @@ -1062,6 +1103,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, hlist_nulls_del_rcu(&l_old->hash_node); if (!htab_is_prealloc(htab)) free_htab_elem(htab, l_old); + else + check_and_free_timer(htab, l_old); } ret = 0; err: @@ -1069,6 +1112,12 @@ err: return ret; } +static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) +{ + check_and_free_timer(htab, elem); + bpf_lru_push_free(&htab->lru, &elem->lru_node); +} + static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { @@ -1102,7 +1151,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, l_new = prealloc_lru_pop(htab, key, hash); if (!l_new) return -ENOMEM; - memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size); + copy_map_value(&htab->map, + l_new->key + round_up(map->key_size, 8), value); ret = htab_lock_bucket(htab, b, hash, &flags); if (ret) @@ -1128,9 +1178,9 @@ err: htab_unlock_bucket(htab, b, hash, flags); if (ret) - bpf_lru_push_free(&htab->lru, &l_new->lru_node); + htab_lru_push_free(htab, l_new); else if (l_old) - bpf_lru_push_free(&htab->lru, &l_old->lru_node); + htab_lru_push_free(htab, l_old); return ret; } @@ -1339,7 +1389,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) htab_unlock_bucket(htab, b, hash, flags); if (l) - bpf_lru_push_free(&htab->lru, &l->lru_node); + htab_lru_push_free(htab, l); return ret; } @@ -1359,6 +1409,35 @@ static void delete_all_elements(struct bpf_htab *htab) } } +static void htab_free_malloced_timers(struct bpf_htab *htab) +{ + int i; + + rcu_read_lock(); + for (i = 0; i < htab->n_buckets; i++) { + struct hlist_nulls_head *head = select_bucket(htab, i); + struct hlist_nulls_node *n; + struct htab_elem *l; + + hlist_nulls_for_each_entry(l, n, head, hash_node) + check_and_free_timer(htab, l); + cond_resched_rcu(); + } + rcu_read_unlock(); +} + +static void htab_map_free_timers(struct bpf_map *map) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + + if (likely(!map_value_has_timer(&htab->map))) + return; + if (!htab_is_prealloc(htab)) + htab_free_malloced_timers(htab); + else + htab_free_prealloced_timers(htab); +} + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void htab_map_free(struct bpf_map *map) { @@ -1456,7 +1535,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, else copy_map_value(map, value, l->key + roundup_key_size); - check_and_init_map_lock(map, value); + check_and_init_map_value(map, value); } hlist_nulls_del_rcu(&l->hash_node); @@ -1467,7 +1546,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, htab_unlock_bucket(htab, b, hash, bflags); if (is_lru_map && l) - bpf_lru_push_free(&htab->lru, &l->lru_node); + htab_lru_push_free(htab, l); return ret; } @@ -1645,7 +1724,7 @@ again_nocopy: true); else copy_map_value(map, dst_val, value); - check_and_init_map_lock(map, dst_val); + check_and_init_map_value(map, dst_val); } if (do_delete) { hlist_nulls_del_rcu(&l->hash_node); @@ -1672,7 +1751,7 @@ again_nocopy: while (node_to_free) { l = node_to_free; node_to_free = node_to_free->batch_flink; - bpf_lru_push_free(&htab->lru, &l->lru_node); + htab_lru_push_free(htab, l); } next_batch: @@ -2034,6 +2113,7 @@ const struct bpf_map_ops htab_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, + .map_release_uref = htab_map_free_timers, .map_lookup_elem = htab_map_lookup_elem, .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, .map_update_elem = htab_map_update_elem, @@ -2055,6 +2135,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, + .map_release_uref = htab_map_free_timers, .map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 62cf00383910..15746f779fe1 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -289,13 +289,18 @@ static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) static DEFINE_PER_CPU(unsigned long, irqsave_flags); -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) { unsigned long flags; local_irq_save(flags); __bpf_spin_lock(lock); __this_cpu_write(irqsave_flags, flags); +} + +notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +{ + __bpf_spin_lock_irqsave(lock); return 0; } @@ -306,13 +311,18 @@ const struct bpf_func_proto bpf_spin_lock_proto = { .arg1_type = ARG_PTR_TO_SPIN_LOCK, }; -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) { unsigned long flags; flags = __this_cpu_read(irqsave_flags); __bpf_spin_unlock(lock); local_irq_restore(flags); +} + +notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +{ + __bpf_spin_unlock_irqrestore(lock); return 0; } @@ -333,9 +343,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, else lock = dst + map->spin_lock_off; preempt_disable(); - ____bpf_spin_lock(lock); + __bpf_spin_lock_irqsave(lock); copy_map_value(map, dst, src); - ____bpf_spin_unlock(lock); + __bpf_spin_unlock_irqrestore(lock); preempt_enable(); } @@ -383,8 +393,6 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { }; #ifdef CONFIG_CGROUP_BPF -DECLARE_PER_CPU(struct bpf_cgroup_storage_info, - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) { @@ -393,17 +401,13 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) * verifier checks that its value is correct. */ enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); - struct bpf_cgroup_storage *storage = NULL; + struct bpf_cgroup_storage *storage; + struct bpf_cg_run_ctx *ctx; void *ptr; - int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) - continue; - - storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); - break; - } + /* get current cgroup storage from BPF run context */ + ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); + storage = ctx->prog_item->cgroup_storage[stype]; if (stype == BPF_CGROUP_STORAGE_SHARED) ptr = &READ_ONCE(storage->buf)->data[0]; @@ -989,6 +993,320 @@ const struct bpf_func_proto bpf_snprintf_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +/* BPF map elements can contain 'struct bpf_timer'. + * Such map owns all of its BPF timers. + * 'struct bpf_timer' is allocated as part of map element allocation + * and it's zero initialized. + * That space is used to keep 'struct bpf_timer_kern'. + * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and + * remembers 'struct bpf_map *' pointer it's part of. + * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. + * bpf_timer_start() arms the timer. + * If user space reference to a map goes to zero at this point + * ops->map_release_uref callback is responsible for cancelling the timers, + * freeing their memory, and decrementing prog's refcnts. + * bpf_timer_cancel() cancels the timer and decrements prog's refcnt. + * Inner maps can contain bpf timers as well. ops->map_release_uref is + * freeing the timers when inner map is replaced or deleted by user space. + */ +struct bpf_hrtimer { + struct hrtimer timer; + struct bpf_map *map; + struct bpf_prog *prog; + void __rcu *callback_fn; + void *value; +}; + +/* the actual struct hidden inside uapi struct bpf_timer */ +struct bpf_timer_kern { + struct bpf_hrtimer *timer; + /* bpf_spin_lock is used here instead of spinlock_t to make + * sure that it always fits into space resereved by struct bpf_timer + * regardless of LOCKDEP and spinlock debug flags. + */ + struct bpf_spin_lock lock; +} __attribute__((aligned(8))); + +static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); + +static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) +{ + struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); + struct bpf_map *map = t->map; + void *value = t->value; + void *callback_fn; + void *key; + u32 idx; + + callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); + if (!callback_fn) + goto out; + + /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and + * cannot be preempted by another bpf_timer_cb() on the same cpu. + * Remember the timer this callback is servicing to prevent + * deadlock if callback_fn() calls bpf_timer_cancel() or + * bpf_map_delete_elem() on the same timer. + */ + this_cpu_write(hrtimer_running, t); + if (map->map_type == BPF_MAP_TYPE_ARRAY) { + struct bpf_array *array = container_of(map, struct bpf_array, map); + + /* compute the key */ + idx = ((char *)value - array->value) / array->elem_size; + key = &idx; + } else { /* hash or lru */ + key = value - round_up(map->key_size, 8); + } + + BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key, + (u64)(long)value, 0, 0); + /* The verifier checked that return value is zero. */ + + this_cpu_write(hrtimer_running, NULL); +out: + return HRTIMER_NORESTART; +} + +BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map, + u64, flags) +{ + clockid_t clockid = flags & (MAX_CLOCKS - 1); + struct bpf_hrtimer *t; + int ret = 0; + + BUILD_BUG_ON(MAX_CLOCKS != 16); + BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer)); + BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer)); + + if (in_nmi()) + return -EOPNOTSUPP; + + if (flags >= MAX_CLOCKS || + /* similar to timerfd except _ALARM variants are not supported */ + (clockid != CLOCK_MONOTONIC && + clockid != CLOCK_REALTIME && + clockid != CLOCK_BOOTTIME)) + return -EINVAL; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (t) { + ret = -EBUSY; + goto out; + } + if (!atomic64_read(&map->usercnt)) { + /* maps with timers must be either held by user space + * or pinned in bpffs. + */ + ret = -EPERM; + goto out; + } + /* allocate hrtimer via map_kmalloc to use memcg accounting */ + t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); + if (!t) { + ret = -ENOMEM; + goto out; + } + t->value = (void *)timer - map->timer_off; + t->map = map; + t->prog = NULL; + rcu_assign_pointer(t->callback_fn, NULL); + hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); + t->timer.function = bpf_timer_cb; + timer->timer = t; +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + return ret; +} + +static const struct bpf_func_proto bpf_timer_init_proto = { + .func = bpf_timer_init, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + +BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn, + struct bpf_prog_aux *, aux) +{ + struct bpf_prog *prev, *prog = aux->prog; + struct bpf_hrtimer *t; + int ret = 0; + + if (in_nmi()) + return -EOPNOTSUPP; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (!t) { + ret = -EINVAL; + goto out; + } + if (!atomic64_read(&t->map->usercnt)) { + /* maps with timers must be either held by user space + * or pinned in bpffs. Otherwise timer might still be + * running even when bpf prog is detached and user space + * is gone, since map_release_uref won't ever be called. + */ + ret = -EPERM; + goto out; + } + prev = t->prog; + if (prev != prog) { + /* Bump prog refcnt once. Every bpf_timer_set_callback() + * can pick different callback_fn-s within the same prog. + */ + prog = bpf_prog_inc_not_zero(prog); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto out; + } + if (prev) + /* Drop prev prog refcnt when swapping with new prog */ + bpf_prog_put(prev); + t->prog = prog; + } + rcu_assign_pointer(t->callback_fn, callback_fn); +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + return ret; +} + +static const struct bpf_func_proto bpf_timer_set_callback_proto = { + .func = bpf_timer_set_callback, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, + .arg2_type = ARG_PTR_TO_FUNC, +}; + +BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags) +{ + struct bpf_hrtimer *t; + int ret = 0; + + if (in_nmi()) + return -EOPNOTSUPP; + if (flags) + return -EINVAL; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (!t || !t->prog) { + ret = -EINVAL; + goto out; + } + hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT); +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + return ret; +} + +static const struct bpf_func_proto bpf_timer_start_proto = { + .func = bpf_timer_start, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static void drop_prog_refcnt(struct bpf_hrtimer *t) +{ + struct bpf_prog *prog = t->prog; + + if (prog) { + bpf_prog_put(prog); + t->prog = NULL; + rcu_assign_pointer(t->callback_fn, NULL); + } +} + +BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) +{ + struct bpf_hrtimer *t; + int ret = 0; + + if (in_nmi()) + return -EOPNOTSUPP; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (!t) { + ret = -EINVAL; + goto out; + } + if (this_cpu_read(hrtimer_running) == t) { + /* If bpf callback_fn is trying to bpf_timer_cancel() + * its own timer the hrtimer_cancel() will deadlock + * since it waits for callback_fn to finish + */ + ret = -EDEADLK; + goto out; + } + drop_prog_refcnt(t); +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + /* Cancel the timer and wait for associated callback to finish + * if it was running. + */ + ret = ret ?: hrtimer_cancel(&t->timer); + return ret; +} + +static const struct bpf_func_proto bpf_timer_cancel_proto = { + .func = bpf_timer_cancel, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, +}; + +/* This function is called by map_delete/update_elem for individual element and + * by ops->map_release_uref when the user space reference to a map reaches zero. + */ +void bpf_timer_cancel_and_free(void *val) +{ + struct bpf_timer_kern *timer = val; + struct bpf_hrtimer *t; + + /* Performance optimization: read timer->timer without lock first. */ + if (!READ_ONCE(timer->timer)) + return; + + __bpf_spin_lock_irqsave(&timer->lock); + /* re-read it under lock */ + t = timer->timer; + if (!t) + goto out; + drop_prog_refcnt(t); + /* The subsequent bpf_timer_start/cancel() helpers won't be able to use + * this timer, since it won't be initialized. + */ + timer->timer = NULL; +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + if (!t) + return; + /* Cancel the timer and wait for callback to complete if it was running. + * If hrtimer_cancel() can be safely called it's safe to call kfree(t) + * right after for both preallocated and non-preallocated maps. + * The timer->timer = NULL was already done and no code path can + * see address 't' anymore. + * + * Check that bpf_map_delete/update_elem() wasn't called from timer + * callback_fn. In such case don't call hrtimer_cancel() (since it will + * deadlock) and don't call hrtimer_try_to_cancel() (since it will just + * return -1). Though callback_fn is still running on this cpu it's + * safe to do kfree(t) because bpf_timer_cb() read everything it needed + * from 't'. The bpf subprog callback_fn won't be able to access 't', + * since timer->timer = NULL was already done. The timer will be + * effectively cancelled because bpf_timer_cb() will return + * HRTIMER_NORESTART. + */ + if (this_cpu_read(hrtimer_running) != t) + hrtimer_cancel(&t->timer); + kfree(t); +} + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; @@ -1055,6 +1373,14 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_per_cpu_ptr_proto; case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; + case BPF_FUNC_timer_init: + return &bpf_timer_init_proto; + case BPF_FUNC_timer_set_callback: + return &bpf_timer_set_callback_proto; + case BPF_FUNC_timer_start: + return &bpf_timer_start_proto; + case BPF_FUNC_timer_cancel: + return &bpf_timer_cancel_proto; default: break; } diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index bd11db9774c3..035e9e3a7132 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -1,6 +1,7 @@ //SPDX-License-Identifier: GPL-2.0 #include <linux/bpf-cgroup.h> #include <linux/bpf.h> +#include <linux/bpf_local_storage.h> #include <linux/btf.h> #include <linux/bug.h> #include <linux/filter.h> @@ -11,9 +12,6 @@ #ifdef CONFIG_CGROUP_BPF -DEFINE_PER_CPU(struct bpf_cgroup_storage_info, - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); - #include "../cgroup/cgroup-internal.h" #define LOCAL_STORAGE_CREATE_FLAG_MASK \ @@ -173,7 +171,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key, return -ENOMEM; memcpy(&new->data[0], value, map->value_size); - check_and_init_map_lock(map, new->data); + check_and_init_map_value(map, new->data); new = xchg(&storage->buf, new); kfree_rcu(new, rcu); @@ -286,9 +284,17 @@ enoent: static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) { + __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE; int numa_node = bpf_map_attr_numa_node(attr); struct bpf_cgroup_storage_map *map; + /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu + * is the same as other local storages. + */ + if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) + max_value_size = min_t(__u32, max_value_size, + PCPU_MIN_UNIT_SIZE); + if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) && attr->key_size != sizeof(__u64)) return ERR_PTR(-EINVAL); @@ -296,7 +302,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) if (attr->value_size == 0) return ERR_PTR(-EINVAL); - if (attr->value_size > PAGE_SIZE) + if (attr->value_size > max_value_size) return ERR_PTR(-E2BIG); if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK || @@ -409,7 +415,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map, static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { - enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); + enum bpf_cgroup_storage_type stype; struct bpf_cgroup_storage *storage; int cpu; @@ -509,7 +515,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, map->numa_node); if (!storage->buf) goto enomem; - check_and_init_map_lock(map, storage->buf->data); + check_and_init_map_value(map, storage->buf->data); } else { storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp); if (!storage->percpu_buf) diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 39ab0b68cade..5cd8f5277279 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -3,6 +3,7 @@ */ #include <linux/slab.h> #include <linux/bpf.h> +#include <linux/btf.h> #include "map_in_map.h" @@ -50,6 +51,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; inner_map_meta->spin_lock_off = inner_map->spin_lock_off; + inner_map_meta->timer_off = inner_map->timer_off; + if (inner_map->btf) { + btf_get(inner_map->btf); + inner_map_meta->btf = inner_map->btf; + } /* Misc members not needed in bpf_map_meta_equal() check. */ inner_map_meta->ops = inner_map->ops; @@ -65,6 +71,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) void bpf_map_meta_free(struct bpf_map *map_meta) { + btf_put(map_meta->btf); kfree(map_meta); } @@ -75,6 +82,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, return meta0->map_type == meta1->map_type && meta0->key_size == meta1->key_size && meta0->value_size == meta1->value_size && + meta0->timer_off == meta1->timer_off && meta0->map_flags == meta1->map_flags; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e343f158e556..9a2068e39d23 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -260,8 +260,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, copy_map_value_locked(map, value, ptr, true); else copy_map_value(map, value, ptr); - /* mask lock, since value wasn't zero inited */ - check_and_init_map_lock(map, value); + /* mask lock and timer, since value wasn't zero inited */ + check_and_init_map_value(map, value); } rcu_read_unlock(); } @@ -623,7 +623,8 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) struct bpf_map *map = filp->private_data; int err; - if (!map->ops->map_mmap || map_value_has_spin_lock(map)) + if (!map->ops->map_mmap || map_value_has_spin_lock(map) || + map_value_has_timer(map)) return -ENOTSUPP; if (!(vma->vm_flags & VM_SHARED)) @@ -793,6 +794,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, } } + map->timer_off = btf_find_timer(btf, value_type); + if (map_value_has_timer(map)) { + if (map->map_flags & BPF_F_RDONLY_PROG) + return -EACCES; + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) + return -EOPNOTSUPP; + } + if (map->ops->map_check_btf) ret = map->ops->map_check_btf(map, btf, key_type, value_type); @@ -844,6 +855,7 @@ static int map_create(union bpf_attr *attr) mutex_init(&map->freeze_mutex); map->spin_lock_off = -EINVAL; + map->timer_off = -EINVAL; if (attr->btf_key_type_id || attr->btf_value_type_id || /* Even the map's value is a kernel's struct, * the bpf_prog.o must have BTF to begin with @@ -1591,7 +1603,8 @@ static int map_freeze(const union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || + map_value_has_timer(map)) { fdput(f); return -ENOTSUPP; } @@ -1699,6 +1712,8 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) { + unsigned long flags; + /* cBPF to eBPF migrations are currently not in the idr store. * Offloaded programs are removed from the store when their device * disappears - even if someone grabs an fd to them they are unusable, @@ -1708,7 +1723,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) return; if (do_idr_lock) - spin_lock_bh(&prog_idr_lock); + spin_lock_irqsave(&prog_idr_lock, flags); else __acquire(&prog_idr_lock); @@ -1716,7 +1731,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) prog->aux->id = 0; if (do_idr_lock) - spin_unlock_bh(&prog_idr_lock); + spin_unlock_irqrestore(&prog_idr_lock, flags); else __release(&prog_idr_lock); } @@ -1752,14 +1767,32 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) } } +static void bpf_prog_put_deferred(struct work_struct *work) +{ + struct bpf_prog_aux *aux; + struct bpf_prog *prog; + + aux = container_of(work, struct bpf_prog_aux, work); + prog = aux->prog; + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); + bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + __bpf_prog_put_noref(prog, true); +} + static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { - if (atomic64_dec_and_test(&prog->aux->refcnt)) { - perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); - bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + struct bpf_prog_aux *aux = prog->aux; + + if (atomic64_dec_and_test(&aux->refcnt)) { /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); - __bpf_prog_put_noref(prog, true); + + if (in_irq() || irqs_disabled()) { + INIT_WORK(&aux->work, bpf_prog_put_deferred); + schedule_work(&aux->work); + } else { + bpf_prog_put_deferred(&aux->work); + } } } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 28a3630c48ee..b2535acfe9db 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -172,7 +172,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) } static struct bpf_tramp_progs * -bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) +bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg) { const struct bpf_prog_aux *aux; struct bpf_tramp_progs *tprogs; @@ -189,8 +189,10 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) *total += tr->progs_cnt[kind]; progs = tprogs[kind].progs; - hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { + *ip_arg |= aux->prog->call_get_func_ip; *progs++ = aux->prog; + } } return tprogs; } @@ -333,9 +335,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) struct bpf_tramp_image *im; struct bpf_tramp_progs *tprogs; u32 flags = BPF_TRAMP_F_RESTORE_REGS; + bool ip_arg = false; int err, total; - tprogs = bpf_trampoline_get_progs(tr, &total); + tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg); if (IS_ERR(tprogs)) return PTR_ERR(tprogs); @@ -357,6 +360,9 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + if (ip_arg) + flags |= BPF_TRAMP_F_IP_ARG; + err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, &tr->func.model, flags, tprogs, tr->func.addr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f9bda5476ea5..5ea2238a6656 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -255,6 +255,7 @@ struct bpf_call_arg_meta { int mem_size; u64 msize_max_value; int ref_obj_id; + int map_uid; int func_id; struct btf *btf; u32 btf_id; @@ -734,6 +735,10 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->refs[i].id) verbose(env, ",%d", state->refs[i].id); } + if (state->in_callback_fn) + verbose(env, " cb"); + if (state->in_async_callback_fn) + verbose(env, " async_cb"); verbose(env, "\n"); } @@ -1135,6 +1140,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) if (map->inner_map_meta) { reg->type = CONST_PTR_TO_MAP; reg->map_ptr = map->inner_map_meta; + /* transfer reg's id which is unique for every map_lookup_elem + * as UID of the inner map. + */ + reg->map_uid = reg->id; } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -1522,6 +1531,54 @@ static void init_func_state(struct bpf_verifier_env *env, init_reg_state(env, state); } +/* Similar to push_stack(), but for async callbacks */ +static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx, + int subprog) +{ + struct bpf_verifier_stack_elem *elem; + struct bpf_func_state *frame; + + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + if (!elem) + goto err; + + elem->insn_idx = insn_idx; + elem->prev_insn_idx = prev_insn_idx; + elem->next = env->head; + elem->log_pos = env->log.len_used; + env->head = elem; + env->stack_size++; + if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { + verbose(env, + "The sequence of %d jumps is too complex for async cb.\n", + env->stack_size); + goto err; + } + /* Unlike push_stack() do not copy_verifier_state(). + * The caller state doesn't matter. + * This is async callback. It starts in a fresh stack. + * Initialize it similar to do_check_common(). + */ + elem->st.branches = 1; + frame = kzalloc(sizeof(*frame), GFP_KERNEL); + if (!frame) + goto err; + init_func_state(env, frame, + BPF_MAIN_FUNC /* callsite */, + 0 /* frameno within this callchain */, + subprog /* subprog number within this prog */); + elem->st.frame[0] = frame; + return &elem->st; +err: + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + /* pop all elements and return */ + while (!pop_stack(env, NULL, NULL, false)); + return NULL; +} + + enum reg_arg_type { SRC_OP, /* register is used as source operand */ DST_OP, /* register is used as destination operand */ @@ -3217,6 +3274,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, return -EACCES; } } + if (map_value_has_timer(map)) { + u32 t = map->timer_off; + + if (reg->smin_value + off < t + sizeof(struct bpf_timer) && + t < reg->umax_value + off + size) { + verbose(env, "bpf_timer cannot be accessed directly by load/store\n"); + return -EACCES; + } + } return err; } @@ -3619,6 +3685,8 @@ process_func: continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { + int next_insn; + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ @@ -3626,13 +3694,22 @@ continue_func: ret_prog[frame] = idx; /* find the callee */ - i = i + insn[i].imm + 1; - idx = find_subprog(env, i); + next_insn = i + insn[i].imm + 1; + idx = find_subprog(env, next_insn); if (idx < 0) { WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", - i); + next_insn); return -EFAULT; } + if (subprog[idx].is_async_cb) { + if (subprog[idx].has_tail_call) { + verbose(env, "verifier bug. subprog has tail_call and async cb\n"); + return -EFAULT; + } + /* async callbacks don't increase bpf prog stack size */ + continue; + } + i = next_insn; if (subprog[idx].has_tail_call) tail_call_reachable = true; @@ -4634,6 +4711,54 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, return 0; } +static int process_timer_func(struct bpf_verifier_env *env, int regno, + struct bpf_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + bool is_const = tnum_is_const(reg->var_off); + struct bpf_map *map = reg->map_ptr; + u64 val = reg->var_off.value; + + if (!is_const) { + verbose(env, + "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n", + map->name); + return -EINVAL; + } + if (!map_value_has_timer(map)) { + if (map->timer_off == -E2BIG) + verbose(env, + "map '%s' has more than one 'struct bpf_timer'\n", + map->name); + else if (map->timer_off == -ENOENT) + verbose(env, + "map '%s' doesn't have 'struct bpf_timer'\n", + map->name); + else + verbose(env, + "map '%s' is not a struct type or bpf_timer is mangled\n", + map->name); + return -EINVAL; + } + if (map->timer_off != val + reg->off) { + verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n", + val + reg->off, map->timer_off); + return -EINVAL; + } + if (meta->map_ptr) { + verbose(env, "verifier bug. Two map pointers in a timer helper\n"); + return -EFAULT; + } + meta->map_uid = reg->map_uid; + meta->map_ptr = map; + return 0; +} + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { return type == ARG_PTR_TO_MEM || @@ -4766,6 +4891,7 @@ static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PER static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -4797,6 +4923,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, + [ARG_PTR_TO_TIMER] = &timer_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -4926,7 +5053,29 @@ skip_type_check: if (arg_type == ARG_CONST_MAP_PTR) { /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ + if (meta->map_ptr) { + /* Use map_uid (which is unique id of inner map) to reject: + * inner_map1 = bpf_map_lookup_elem(outer_map, key1) + * inner_map2 = bpf_map_lookup_elem(outer_map, key2) + * if (inner_map1 && inner_map2) { + * timer = bpf_map_lookup_elem(inner_map1); + * if (timer) + * // mismatch would have been allowed + * bpf_timer_init(timer, inner_map2); + * } + * + * Comparing map_ptr is enough to distinguish normal and outer maps. + */ + if (meta->map_ptr != reg->map_ptr || + meta->map_uid != reg->map_uid) { + verbose(env, + "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", + meta->map_uid, reg->map_uid); + return -EINVAL; + } + } meta->map_ptr = reg->map_ptr; + meta->map_uid = reg->map_uid; } else if (arg_type == ARG_PTR_TO_MAP_KEY) { /* bpf_map_xxx(..., map_ptr, ..., key) call: * check that [key, key + map->key_size) are within @@ -4978,6 +5127,9 @@ skip_type_check: verbose(env, "verifier internal error\n"); return -EFAULT; } + } else if (arg_type == ARG_PTR_TO_TIMER) { + if (process_timer_func(env, regno, meta)) + return -EACCES; } else if (arg_type == ARG_PTR_TO_FUNC) { meta->subprogno = reg->subprogno; } else if (arg_type_is_mem_ptr(arg_type)) { @@ -5593,6 +5745,31 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn } } + if (insn->code == (BPF_JMP | BPF_CALL) && + insn->imm == BPF_FUNC_timer_set_callback) { + struct bpf_verifier_state *async_cb; + + /* there is no real recursion here. timer callbacks are async */ + env->subprog_info[subprog].is_async_cb = true; + async_cb = push_async_cb(env, env->subprog_info[subprog].start, + *insn_idx, subprog); + if (!async_cb) + return -EFAULT; + callee = async_cb->frame[0]; + callee->async_entry_cnt = caller->async_entry_cnt + 1; + + /* Convert bpf_timer_set_callback() args into timer callback args */ + err = set_callee_state_cb(env, caller, callee, *insn_idx); + if (err) + return err; + + clear_caller_saved_regs(env, caller->regs); + mark_reg_unknown(env, caller->regs, BPF_REG_0); + caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + /* continue with next insn after call */ + return 0; + } + callee = kzalloc(sizeof(*callee), GFP_KERNEL); if (!callee) return -ENOMEM; @@ -5720,6 +5897,35 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_timer_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr; + + /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); + * callback_fn(struct bpf_map *map, void *key, void *value); + */ + callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; + __mark_reg_known_zero(&callee->regs[BPF_REG_1]); + callee->regs[BPF_REG_1].map_ptr = map_ptr; + + callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].map_ptr = map_ptr; + + callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_3]); + callee->regs[BPF_REG_3].map_ptr = map_ptr; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + callee->in_async_callback_fn = true; + return 0; +} + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@ -5933,6 +6139,29 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, return err; } +static int check_get_func_ip(struct bpf_verifier_env *env) +{ + enum bpf_attach_type eatype = env->prog->expected_attach_type; + enum bpf_prog_type type = resolve_prog_type(env->prog); + int func_id = BPF_FUNC_get_func_ip; + + if (type == BPF_PROG_TYPE_TRACING) { + if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT && + eatype != BPF_MODIFY_RETURN) { + verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", + func_id_name(func_id), func_id); + return -ENOTSUPP; + } + return 0; + } else if (type == BPF_PROG_TYPE_KPROBE) { + return 0; + } + + verbose(env, "func %s#%d not supported for program type %d\n", + func_id_name(func_id), func_id, type); + return -ENOTSUPP; +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -6047,6 +6276,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } + if (func_id == BPF_FUNC_timer_set_callback) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_timer_callback_state); + if (err < 0) + return -EINVAL; + } + if (func_id == BPF_FUNC_snprintf) { err = check_bpf_snprintf_call(env, regs); if (err < 0) @@ -6082,6 +6318,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } regs[BPF_REG_0].map_ptr = meta.map_ptr; + regs[BPF_REG_0].map_uid = meta.map_uid; if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; if (map_value_has_spin_lock(meta.map_ptr)) @@ -6203,6 +6440,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) env->prog->call_get_stack = true; + if (func_id == BPF_FUNC_get_func_ip) { + if (check_get_func_ip(env)) + return -ENOTSUPP; + env->prog->call_get_func_ip = true; + } + if (changes_data) clear_all_pkt_pointers(env); return 0; @@ -9083,7 +9326,8 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum range = tnum_range(0, 1); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; - const bool is_subprog = env->cur_state->frame[0]->subprogno; + struct bpf_func_state *frame = env->cur_state->frame[0]; + const bool is_subprog = frame->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ if (!is_subprog && @@ -9108,6 +9352,22 @@ static int check_return_code(struct bpf_verifier_env *env) } reg = cur_regs(env) + BPF_REG_0; + + if (frame->in_async_callback_fn) { + /* enforce return zero from async callbacks like timer */ + if (reg->type != SCALAR_VALUE) { + verbose(env, "In async callback the register R0 is not a known value (%s)\n", + reg_type_str[reg->type]); + return -EINVAL; + } + + if (!tnum_in(tnum_const(0), reg->var_off)) { + verbose_invalid_scalar(env, reg, &range, "async callback", "R0"); + return -EINVAL; + } + return 0; + } + if (is_subprog) { if (reg->type != SCALAR_VALUE) { verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", @@ -9322,8 +9582,12 @@ static int visit_func_call_insn(int t, int insn_cnt, init_explored_state(env, t + 1); if (visit_callee) { init_explored_state(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, - env, false); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, + /* It's ok to allow recursion from CFG point of + * view. __check_func_call() will do the actual + * check. + */ + bpf_pseudo_func(insns + t)); } return ret; } @@ -9351,6 +9615,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env) return DONE_EXPLORING; case BPF_CALL: + if (insns[t].imm == BPF_FUNC_timer_set_callback) + /* Mark this call insn to trigger is_state_visited() check + * before call itself is processed by __check_func_call(). + * Otherwise new async state will be pushed for further + * exploration. + */ + init_explored_state(env, t); return visit_func_call_insn(t, insn_cnt, insns, env, insns[t].src_reg == BPF_PSEUDO_CALL); @@ -10359,9 +10630,25 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) states_cnt++; if (sl->state.insn_idx != insn_idx) goto next; + if (sl->state.branches) { - if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur)) { + struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; + + if (frame->in_async_callback_fn && + frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) { + /* Different async_entry_cnt means that the verifier is + * processing another entry into async callback. + * Seeing the same state is not an indication of infinite + * loop or infinite recursion. + * But finding the same state doesn't mean that it's safe + * to stop processing the current state. The previous state + * hasn't yet reached bpf_exit, since state.branches > 0. + * Checking in_async_callback_fn alone is not enough either. + * Since the verifier still needs to catch infinite loops + * inside async callbacks. + */ + } else if (states_maybe_looping(&sl->state, cur) && + states_equal(env, &sl->state, cur)) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); return -EINVAL; @@ -11410,10 +11697,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying * [0, off) and [off, end) to new locations, so the patched range stays zero */ -static int adjust_insn_aux_data(struct bpf_verifier_env *env, - struct bpf_prog *new_prog, u32 off, u32 cnt) +static void adjust_insn_aux_data(struct bpf_verifier_env *env, + struct bpf_insn_aux_data *new_data, + struct bpf_prog *new_prog, u32 off, u32 cnt) { - struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + struct bpf_insn_aux_data *old_data = env->insn_aux_data; struct bpf_insn *insn = new_prog->insnsi; u32 old_seen = old_data[off].seen; u32 prog_len; @@ -11426,12 +11714,9 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); if (cnt == 1) - return 0; + return; prog_len = new_prog->len; - new_data = vzalloc(array_size(prog_len, - sizeof(struct bpf_insn_aux_data))); - if (!new_data) - return -ENOMEM; + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); @@ -11442,7 +11727,6 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, } env->insn_aux_data = new_data; vfree(old_data); - return 0; } static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) @@ -11477,6 +11761,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of const struct bpf_insn *patch, u32 len) { struct bpf_prog *new_prog; + struct bpf_insn_aux_data *new_data = NULL; + + if (len > 1) { + new_data = vzalloc(array_size(env->prog->len + len - 1, + sizeof(struct bpf_insn_aux_data))); + if (!new_data) + return NULL; + } new_prog = bpf_patch_insn_single(env->prog, off, patch, len); if (IS_ERR(new_prog)) { @@ -11484,10 +11776,10 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of verbose(env, "insn %d cannot be patched due to 16-bit range\n", env->insn_aux_data[off].orig_idx); + vfree(new_data); return NULL; } - if (adjust_insn_aux_data(env, new_prog, off, len)) - return NULL; + adjust_insn_aux_data(env, new_data, new_prog, off, len); adjust_subprog_starts(env, off, len); adjust_poke_descs(new_prog, off, len); return new_prog; @@ -12337,6 +12629,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; bool expect_blinding = bpf_jit_blinding_enabled(prog); + enum bpf_prog_type prog_type = resolve_prog_type(prog); struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; @@ -12554,6 +12847,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env) continue; } + if (insn->imm == BPF_FUNC_timer_set_callback) { + /* The verifier will process callback_fn as many times as necessary + * with different maps and the register states prepared by + * set_timer_callback_state will be accurate. + * + * The following use case is valid: + * map1 is shared by prog1, prog2, prog3. + * prog1 calls bpf_timer_init for some map1 elements + * prog2 calls bpf_timer_set_callback for some map1 elements. + * Those that were not bpf_timer_init-ed will return -EINVAL. + * prog3 calls bpf_timer_start for some map1 elements. + * Those that were not both bpf_timer_init-ed and + * bpf_timer_set_callback-ed will return -EINVAL. + */ + struct bpf_insn ld_addrs[2] = { + BPF_LD_IMM64(BPF_REG_3, (long)prog->aux), + }; + + insn_buf[0] = ld_addrs[0]; + insn_buf[1] = ld_addrs[1]; + insn_buf[2] = *insn; + cnt = 3; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto patch_call_imm; + } + /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup * and other inlining handlers are currently limited to 64 bit * only. @@ -12670,6 +12996,21 @@ patch_map_ops_generic: continue; } + /* Implement bpf_get_func_ip inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_ip) { + /* Load IP address from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); + if (!new_prog) + return -ENOMEM; + + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed diff --git a/kernel/fork.c b/kernel/fork.c index bc94b2cc5995..e8b41e212110 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2083,6 +2083,7 @@ static __latent_entropy struct task_struct *copy_process( #endif #ifdef CONFIG_BPF_SYSCALL RCU_INIT_POINTER(p->bpf_storage, NULL); + p->bpf_ctx = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b4916ef388ad..c5e0b6a64091 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -948,6 +948,33 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) +{ + /* This helper call is inlined by verifier. */ + return ((u64 *)ctx)[-1]; +} + +static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { + .func = bpf_get_func_ip_tracing, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) +{ + struct kprobe *kp = kprobe_running(); + + return kp ? (uintptr_t)kp->addr : 0; +} + +static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { + .func = bpf_get_func_ip_kprobe, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1058,8 +1085,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_for_each_map_elem_proto; case BPF_FUNC_snprintf: return &bpf_snprintf_proto; + case BPF_FUNC_get_func_ip: + return &bpf_get_func_ip_proto_tracing; default: - return NULL; + return bpf_base_func_proto(func_id); } } @@ -1077,6 +1106,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_override_return: return &bpf_override_return_proto; #endif + case BPF_FUNC_get_func_ip: + return &bpf_get_func_ip_proto_kprobe; default: return bpf_tracing_func_proto(func_id, prog); } @@ -1430,6 +1461,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) const struct bpf_func_proto * tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { + const struct bpf_func_proto *fn; + switch (func_id) { #ifdef CONFIG_NET case BPF_FUNC_skb_output: @@ -1470,7 +1503,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_d_path: return &bpf_d_path_proto; default: - return raw_tp_prog_func_proto(func_id, prog); + fn = raw_tp_prog_func_proto(func_id, prog); + if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) + fn = bpf_iter_get_func_proto(func_id, prog); + return fn; } } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index d500320778c7..f6d5d30d01bf 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4286,8 +4286,8 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_LD_IMM64(R0, 0), BPF_LD_IMM64(R1, 0xffffffffffffffffLL), - BPF_STX_MEM(BPF_W, R10, R1, -40), - BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), BPF_EXIT_INSN(), }, INTERNAL, @@ -6659,7 +6659,14 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) u64 duration; u32 ret; - if (test->test[i].data_size == 0 && + /* + * NOTE: Several sub-tests may be present, in which case + * a zero {data_size, result} tuple indicates the end of + * the sub-test array. The first test is always run, + * even if both data_size and result happen to be zero. + */ + if (i > 0 && + test->test[i].data_size == 0 && test->test[i].result == 0) break; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index eb8e87c4833f..8ef06f9e0db1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -968,7 +968,7 @@ static __always_inline bool memcg_kmem_bypass(void) return false; /* Memcg to charge can't be determined. */ - if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD)) + if (!in_task() || !current->mm || (current->flags & PF_KTHREAD)) return true; return false; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4cdf8416869d..55275ef9a31a 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, return 0; size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; - array = kzalloc(size, GFP_KERNEL); + array = kzalloc(size, GFP_KERNEL_ACCOUNT); if (array == NULL) return -ENOBUFS; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index a0367b37512d..0c21d1fec852 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -372,8 +372,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: case SIOCGHWTSTAMP: - if (netif_device_present(real_dev) && ops->ndo_do_ioctl) - err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); + if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) + err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); break; } @@ -814,7 +814,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_set_mac_address = vlan_dev_set_mac_address, .ndo_set_rx_mode = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, + .ndo_eth_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, .ndo_get_stats64 = vlan_dev_get_stats64, #if IS_ENABLED(CONFIG_FCOE) diff --git a/net/Kconfig b/net/Kconfig index c7392c449b25..fb13460c6dab 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -363,6 +363,7 @@ source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" source "net/kcm/Kconfig" source "net/strparser/Kconfig" +source "net/mctp/Kconfig" config FIB_RULES bool diff --git a/net/Makefile b/net/Makefile index 9ca9572188fe..fbfeb8a0bb37 100644 --- a/net/Makefile +++ b/net/Makefile @@ -78,3 +78,4 @@ obj-$(CONFIG_QRTR) += qrtr/ obj-$(CONFIG_NET_NCSI) += ncsi/ obj-$(CONFIG_XDP_SOCKETS) += xdp/ obj-$(CONFIG_MPTCP) += mptcp/ +obj-$(CONFIG_MCTP) += mctp/ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 8ade5a4ceaf5..bf5736c1d458 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -666,7 +666,7 @@ static int atif_ioctl(int cmd, void __user *arg) struct rtentry rtdef; int add_route; - if (copy_from_user(&atreq, arg, sizeof(atreq))) + if (get_user_ifreq(&atreq, NULL, arg)) return -EFAULT; dev = __dev_get_by_name(&init_net, atreq.ifr_name); @@ -865,7 +865,7 @@ static int atif_ioctl(int cmd, void __user *arg) return 0; } - return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0; + return put_user_ifreq(&atreq, arg); } static int atrtr_ioctl_addrt(struct rtentry *rt) diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index e4f63dd43cb5..36249776c021 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) skb_pull(skb, AX25_KISS_HEADER_LEN); if (digipeat != NULL) { - if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) { - kfree_skb(skb); + if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) goto put; - } skb = ourskb; } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index f53751ba81b3..22f2f66c6e0a 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25) void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) { - struct sk_buff *skbn; unsigned char *ptr; int headroom; @@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) headroom = ax25_addr_size(ax25->digipeat); - if (skb_headroom(skb) < headroom) { - if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) { + if (unlikely(skb_headroom(skb) < headroom)) { + skb = skb_expand_head(skb, headroom); + if (!skb) { printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n"); - kfree_skb(skb); return; } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - consume_skb(skb); - skb = skbn; } ptr = skb_push(skb, headroom); diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index b40e0bce67ea..d0b2e094bd55 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -441,24 +441,17 @@ put: struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_address *dest, ax25_digi *digi) { - struct sk_buff *skbn; unsigned char *bp; int len; len = digi->ndigi * AX25_ADDR_LEN; - if (skb_headroom(skb) < len) { - if ((skbn = skb_realloc_headroom(skb, len)) == NULL) { + if (unlikely(skb_headroom(skb) < len)) { + skb = skb_expand_head(skb, len); + if (!skb) { printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n"); return NULL; } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - consume_skb(skb); - - skb = skbn; } bp = skb_push(skb, len); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 63d42dcc9324..2b639c8b0ded 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2274,8 +2274,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } @@ -2446,8 +2445,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8c95a11a830a..7976a0435662 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -984,8 +984,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 007f2827935d..36a98d3cefe0 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -557,8 +557,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 923e2197c2db..0158f267c403 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) upper = netdev_master_upper_dev_get_rcu(upper); } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); - if (upper) - dev_hold(upper); + dev_hold(upper); rcu_read_unlock(); return upper; @@ -509,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev, } out: - if (bridge) - dev_put(bridge); + dev_put(bridge); return ret4 + ret6; } @@ -2239,8 +2237,7 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb, } out: - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); if (!ret && primary_if) *primary_if = hard_iface; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index da7249448474..6a4d3f437e00 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -799,12 +799,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (hardif) batadv_hardif_put(hardif); - if (hard_iface) - dev_put(hard_iface); + dev_put(hard_iface); if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } @@ -1412,12 +1410,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (hardif) batadv_hardif_put(hardif); - if (hard_iface) - dev_put(hard_iface); + dev_put(hard_iface); if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 434b4f042909..711fe5a2cec4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -820,8 +820,7 @@ check_roaming: out: if (in_hardif) batadv_hardif_put(in_hardif); - if (in_dev) - dev_put(in_dev); + dev_put(in_dev); if (tt_local) batadv_tt_local_entry_put(tt_local); if (tt_global) @@ -1217,8 +1216,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; @@ -2005,8 +2003,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1cc75c811e24..695449088e42 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -15,6 +15,7 @@ #include <linux/error-injection.h> #include <linux/smp.h> #include <linux/sock_diag.h> +#include <net/xdp.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> @@ -87,17 +88,19 @@ reset: static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; + struct bpf_prog_array_item item = {.prog = prog}; + struct bpf_run_ctx *old_ctx; + struct bpf_cg_run_ctx run_ctx; struct bpf_test_timer t = { NO_MIGRATE }; enum bpf_cgroup_storage_type stype; int ret; for_each_cgroup_storage_type(stype) { - storage[stype] = bpf_cgroup_storage_alloc(prog, stype); - if (IS_ERR(storage[stype])) { - storage[stype] = NULL; + item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype); + if (IS_ERR(item.cgroup_storage[stype])) { + item.cgroup_storage[stype] = NULL; for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); + bpf_cgroup_storage_free(item.cgroup_storage[stype]); return -ENOMEM; } } @@ -106,22 +109,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, repeat = 1; bpf_test_timer_enter(&t); + old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); do { - ret = bpf_cgroup_storage_set(storage); - if (ret) - break; - + run_ctx.prog_item = &item; if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = BPF_PROG_RUN(prog, ctx); - - bpf_cgroup_storage_unset(); } while (bpf_test_timer_continue(&t, repeat, &ret, time)); + bpf_reset_run_ctx(old_ctx); bpf_test_timer_leave(&t); for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); + bpf_cgroup_storage_free(item.cgroup_storage[stype]); return ret; } @@ -687,6 +687,64 @@ out: return ret; } +static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) +{ + unsigned int ingress_ifindex, rx_queue_index; + struct netdev_rx_queue *rxqueue; + struct net_device *device; + + if (!xdp_md) + return 0; + + if (xdp_md->egress_ifindex != 0) + return -EINVAL; + + ingress_ifindex = xdp_md->ingress_ifindex; + rx_queue_index = xdp_md->rx_queue_index; + + if (!ingress_ifindex && rx_queue_index) + return -EINVAL; + + if (ingress_ifindex) { + device = dev_get_by_index(current->nsproxy->net_ns, + ingress_ifindex); + if (!device) + return -ENODEV; + + if (rx_queue_index >= device->real_num_rx_queues) + goto free_dev; + + rxqueue = __netif_get_rx_queue(device, rx_queue_index); + + if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq)) + goto free_dev; + + xdp->rxq = &rxqueue->xdp_rxq; + /* The device is now tracked in the xdp->rxq for later + * dev_put() + */ + } + + xdp->data = xdp->data_meta + xdp_md->data; + return 0; + +free_dev: + dev_put(device); + return -EINVAL; +} + +static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md) +{ + if (!xdp_md) + return; + + xdp_md->data = xdp->data - xdp->data_meta; + xdp_md->data_end = xdp->data_end - xdp->data_meta; + + if (xdp_md->ingress_ifindex) + dev_put(xdp->rxq->dev); +} + int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { @@ -697,38 +755,74 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, struct netdev_rx_queue *rxqueue; struct xdp_buff xdp = {}; u32 retval, duration; + struct xdp_md *ctx; u32 max_data_sz; void *data; - int ret; + int ret = -EINVAL; if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; if (kattr->test.ctx_in || kattr->test.ctx_out) return -EINVAL; + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (ctx) { + /* There can't be user provided data before the meta data */ + if (ctx->data_meta || ctx->data_end != size || + ctx->data > ctx->data_end || + unlikely(xdp_metalen_invalid(ctx->data))) + goto free_ctx; + /* Meta data is allocated from the headroom */ + headroom -= ctx->data; + } /* XDP have extra tailroom as (most) drivers use full page */ max_data_sz = 4096 - headroom - tailroom; data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); - if (IS_ERR(data)) - return PTR_ERR(data); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto free_ctx; + } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, &rxqueue->xdp_rxq); xdp_prepare_buff(&xdp, data, headroom, size, true); + ret = xdp_convert_md_to_buff(ctx, &xdp); + if (ret) + goto free_data; + bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); + /* We convert the xdp_buff back to an xdp_md before checking the return + * code so the reference count of any held netdevice will be decremented + * even if the test run failed. + */ + xdp_convert_buff_to_md(&xdp, ctx); if (ret) goto out; - if (xdp.data != data + headroom || xdp.data_end != xdp.data + size) - size = xdp.data_end - xdp.data; - ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); + + if (xdp.data_meta != data + headroom || + xdp.data_end != xdp.data_meta + size) + size = xdp.data_end - xdp.data_meta; + + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, + duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, ctx, + sizeof(struct xdp_md)); + out: bpf_prog_change_xdp(prog, NULL); +free_data: kfree(data); +free_ctx: + kfree(ctx); return ret; } diff --git a/net/bridge/br.c b/net/bridge/br.c index bbab9984f24e..c8ae823aa8e7 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -202,6 +202,48 @@ static struct notifier_block br_switchdev_notifier = { .notifier_call = br_switchdev_event, }; +/* called under rtnl_mutex */ +static int br_switchdev_blocking_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_brport_info *brport_info; + const struct switchdev_brport *b; + struct net_bridge_port *p; + int err = NOTIFY_DONE; + + p = br_port_get_rtnl(dev); + if (!p) + goto out; + + switch (event) { + case SWITCHDEV_BRPORT_OFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + err = br_switchdev_port_offload(p, b->dev, b->ctx, + b->atomic_nb, b->blocking_nb, + b->tx_fwd_offload, extack); + err = notifier_from_errno(err); + break; + case SWITCHDEV_BRPORT_UNOFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb, + b->blocking_nb); + break; + } + +out: + return err; +} + +static struct notifier_block br_switchdev_blocking_notifier = { + .notifier_call = br_switchdev_blocking_event, +}; + /* br_boolopt_toggle - change user-controlled boolean option * * @br: bridge device @@ -215,17 +257,22 @@ static struct notifier_block br_switchdev_notifier = { int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, struct netlink_ext_ack *extack) { + int err = 0; + switch (opt) { case BR_BOOLOPT_NO_LL_LEARN: br_opt_toggle(br, BROPT_NO_LL_LEARN, on); break; + case BR_BOOLOPT_MCAST_VLAN_SNOOPING: + err = br_multicast_toggle_vlan_snooping(br, on, extack); + break; default: /* shouldn't be called with unsupported options */ WARN_ON(1); break; } - return 0; + return err; } int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) @@ -233,6 +280,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) switch (opt) { case BR_BOOLOPT_NO_LL_LEARN: return br_opt_get(br, BROPT_NO_LL_LEARN); + case BR_BOOLOPT_MCAST_VLAN_SNOOPING: + return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED); default: /* shouldn't be called with unsupported options */ WARN_ON(1); @@ -349,11 +398,15 @@ static int __init br_init(void) if (err) goto err_out4; - err = br_netlink_init(); + err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); if (err) goto err_out5; - brioctl_set(br_ioctl_deviceless_stub); + err = br_netlink_init(); + if (err) + goto err_out6; + + brioctl_set(br_ioctl_stub); #if IS_ENABLED(CONFIG_ATM_LANE) br_fdb_test_addr_hook = br_fdb_test_addr; @@ -367,6 +420,8 @@ static int __init br_init(void) return 0; +err_out6: + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); err_out5: unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: @@ -386,6 +441,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index e8b626cc6bfd..8d6bab244c4a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -27,11 +27,14 @@ EXPORT_SYMBOL_GPL(nf_br_ops); /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { + struct net_bridge_mcast_port *pmctx_null = NULL; struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mcast *brmctx = &br->multicast_ctx; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; const struct nf_br_ops *nf_ops; u8 state = BR_STATE_FORWARDING; + struct net_bridge_vlan *vlan; const unsigned char *dest; u16 vid = 0; @@ -53,7 +56,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state)) + if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, + &state, &vlan)) goto out; if (IS_ENABLED(CONFIG_INET) && @@ -82,15 +86,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood(br, skb, BR_PKT_MULTICAST, false, true); goto out; } - if (br_multicast_rcv(br, NULL, skb, vid)) { + if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) { kfree_skb(skb); goto out; } - mdst = br_mdb_get(br, skb, vid); + mdst = br_mdb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br, eth_hdr(skb), mdst)) - br_multicast_flood(mdst, skb, false, true); + br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) + br_multicast_flood(mdst, skb, brmctx, false, true); else br_flood(br, skb, BR_PKT_MULTICAST, false, true); } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) { @@ -450,7 +454,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_set_rx_mode = br_dev_set_multicast_list, .ndo_change_rx_flags = br_dev_change_rx_flags, .ndo_change_mtu = br_change_mtu, - .ndo_do_ioctl = br_dev_ioctl, + .ndo_siocdevprivate = br_dev_siocdevprivate, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = br_netpoll_setup, .ndo_netpoll_cleanup = br_netpoll_cleanup, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 835cec1e5a03..ddd09f5994a7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -732,11 +732,11 @@ static inline size_t fdb_nlmsg_size(void) + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */ } -static int br_fdb_replay_one(struct notifier_block *nb, +static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, const struct net_bridge_fdb_entry *fdb, - struct net_device *dev, unsigned long action, - const void *ctx) + unsigned long action, const void *ctx) { + const struct net_bridge_port *p = READ_ONCE(fdb->dst); struct switchdev_notifier_fdb_info item; int err; @@ -745,25 +745,25 @@ static int br_fdb_replay_one(struct notifier_block *nb, item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); - item.info.dev = dev; + item.info.dev = (!p || item.is_local) ? br->dev : p->dev; item.info.ctx = ctx; err = nb->notifier_call(nb, action, &item); return notifier_to_errno(err); } -int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb) +int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding, + struct notifier_block *nb) { struct net_bridge_fdb_entry *fdb; struct net_bridge *br; unsigned long action; int err = 0; - if (!netif_is_bridge_master(br_dev)) - return -EINVAL; + if (!nb) + return 0; - if (!netif_is_bridge_port(dev) && !netif_is_bridge_master(dev)) + if (!netif_is_bridge_master(br_dev)) return -EINVAL; br = netdev_priv(br_dev); @@ -776,14 +776,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, rcu_read_lock(); hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) { - const struct net_bridge_port *dst = READ_ONCE(fdb->dst); - struct net_device *dst_dev; - - dst_dev = dst ? dst->dev : br->dev; - if (dst_dev && dst_dev != dev) - continue; - - err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx); + err = br_fdb_replay_one(br, nb, fdb, action, ctx); if (err) break; } @@ -792,7 +785,6 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(br_fdb_replay); static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type, diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 07856362538f..ec646656dbf1 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -48,6 +48,8 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb skb_set_network_header(skb, depth); } + br_switchdev_frame_set_offload_fwd_mark(skb); + dev_queue_xmit(skb); return 0; @@ -76,6 +78,11 @@ static void __br_forward(const struct net_bridge_port *to, struct net *net; int br_hook; + /* Mark the skb for forwarding offload early so that br_handle_vlan() + * can know whether to pop the VLAN header on egress or keep it. + */ + nbp_switchdev_frame_mark_tx_fwd_offload(to, skb); + vg = nbp_vlan_group_rcu(to); skb = br_handle_vlan(to->br, to, vg, skb); if (!skb) @@ -174,6 +181,8 @@ static struct net_bridge_port *maybe_deliver( if (!should_deliver(p, skb)) return prev; + nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb); + if (!prev) goto out; @@ -267,20 +276,19 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, /* called with rcu_read_lock */ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, + struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { - struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; - struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; bool allow_mode_include = true; struct hlist_node *rp; - rp = br_multicast_get_first_rport_node(br, skb); + rp = br_multicast_get_first_rport_node(brmctx, skb); if (mdst) { p = rcu_dereference(mdst->ports); - if (br_multicast_should_handle_mode(br, mdst->addr.proto) && + if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) && br_multicast_is_star_g(&mdst->addr)) allow_mode_include = false; } else { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6e4a32354a13..67c60240b713 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name) dev_net_set(dev, net); dev->rtnl_link_ops = &br_link_ops; - res = register_netdev(dev); + res = register_netdevice(dev); if (res) free_netdev(dev); return res; @@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name) struct net_device *dev; int ret = 0; - rtnl_lock(); dev = __dev_get_by_name(net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name) else br_dev_delete(dev, NULL); - rtnl_unlock(); return ret; } @@ -643,10 +641,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, if (err) goto err5; - err = nbp_switchdev_mark_set(p); - if (err) - goto err6; - dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -684,13 +678,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, */ err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack); if (err) - goto err7; + goto err6; } err = nbp_vlan_init(p, extack); if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); - goto err7; + goto err6; } spin_lock_bh(&br->lock); @@ -713,13 +707,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, return 0; -err7: +err6: if (fdb_synced) br_fdb_unsync_static(br, p); list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); -err6: netdev_upper_dev_unlink(dev, br->dev); err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1f506309efa8..8a0c0cc55cb4 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -69,8 +69,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb struct net_bridge_port *p = br_port_get_rcu(skb->dev); enum br_pkt_type pkt_type = BR_PKT_UNICAST; struct net_bridge_fdb_entry *dst = NULL; + struct net_bridge_mcast_port *pmctx; struct net_bridge_mdb_entry *mdst; bool local_rcv, mcast_hit = false; + struct net_bridge_mcast *brmctx; + struct net_bridge_vlan *vlan; struct net_bridge *br; u16 vid = 0; u8 state; @@ -78,9 +81,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!p || p->state == BR_STATE_DISABLED) goto drop; + brmctx = &p->br->multicast_ctx; + pmctx = &p->multicast_ctx; state = p->state; if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid, - &state)) + &state, &vlan)) goto out; nbp_switchdev_frame_mark(p, skb); @@ -98,7 +103,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb local_rcv = true; } else { pkt_type = BR_PKT_MULTICAST; - if (br_multicast_rcv(br, p, skb, vid)) + if (br_multicast_rcv(&brmctx, &pmctx, vlan, skb, vid)) goto drop; } } @@ -128,11 +133,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb switch (pkt_type) { case BR_PKT_MULTICAST: - mdst = br_mdb_get(br, skb, vid); + mdst = br_mdb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br, eth_hdr(skb), mdst)) { + br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) { if ((mdst && mdst->host_joined) || - br_multicast_is_router(br, skb)) { + br_multicast_is_router(brmctx, skb)) { local_rcv = true; br->dev->stats.multicast++; } @@ -162,7 +167,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!mcast_hit) br_flood(br, skb, pkt_type, local_rcv, false); else - br_multicast_flood(mdst, skb, local_rcv, false); + br_multicast_flood(mdst, skb, brmctx, local_rcv, false); } if (local_rcv) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 2db800fc27ca..793b0db9d9a3 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -106,15 +106,32 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) * This interface is deprecated because it was too difficult * to do the translation for 32/64bit ioctl compatibility. */ -static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p = NULL; unsigned long args[4]; + void __user *argp; int ret = -EOPNOTSUPP; - if (copy_from_user(args, rq->ifr_data, sizeof(args))) - return -EFAULT; + if (in_compat_syscall()) { + unsigned int cargs[4]; + + if (copy_from_user(cargs, data, sizeof(cargs))) + return -EFAULT; + + args[0] = cargs[0]; + args[1] = cargs[1]; + args[2] = cargs[2]; + args[3] = cargs[3]; + + argp = compat_ptr(args[1]); + } else { + if (copy_from_user(args, data, sizeof(args))) + return -EFAULT; + + argp = (void __user *)args[1]; + } switch (args[0]) { case BRCTL_ADD_IF: @@ -171,7 +188,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -ENOMEM; get_port_ifindices(br, indices, num); - if (copy_to_user((void __user *)args[1], indices, num*sizeof(int))) + if (copy_to_user(argp, indices, num * sizeof(int))) num = -EFAULT; kfree(indices); return num; @@ -232,7 +249,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) rcu_read_unlock(); - if (copy_to_user((void __user *)args[1], &p, sizeof(p))) + if (copy_to_user(argp, &p, sizeof(p))) return -EFAULT; return 0; @@ -282,8 +299,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } case BRCTL_GET_FDB_ENTRIES: - return get_fdb_entries(br, (void __user *)args[1], - args[2], args[3]); + return get_fdb_entries(br, argp, args[2], args[3]); } if (!ret) { @@ -320,7 +336,7 @@ static int old_deviceless(struct net *net, void __user *uarg) args[2] = get_bridge_ifindices(net, indices, args[2]); - ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) + ret = copy_to_user(uarg, indices, args[2]*sizeof(int)) ? -EFAULT : args[2]; kfree(indices); @@ -350,48 +366,47 @@ static int old_deviceless(struct net *net, void __user *uarg) return -EOPNOTSUPP; } -int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) +int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, + struct ifreq *ifr, void __user *uarg) { + int ret = -EOPNOTSUPP; + + rtnl_lock(); + switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: - return old_deviceless(net, uarg); - + ret = old_deviceless(net, uarg); + break; case SIOCBRADDBR: case SIOCBRDELBR: { char buf[IFNAMSIZ]; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } - if (copy_from_user(buf, uarg, IFNAMSIZ)) - return -EFAULT; + if (copy_from_user(buf, uarg, IFNAMSIZ)) { + ret = -EFAULT; + break; + } buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) - return br_add_bridge(net, buf); - - return br_del_bridge(net, buf); - } + ret = br_add_bridge(net, buf); + else + ret = br_del_bridge(net, buf); } - return -EOPNOTSUPP; -} - -int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -{ - struct net_bridge *br = netdev_priv(dev); - - switch (cmd) { - case SIOCDEVPRIVATE: - return old_dev_ioctl(dev, rq, cmd); - + break; case SIOCBRADDIF: case SIOCBRDELIF: - return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF); - + ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF); + break; } - br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd); - return -EOPNOTSUPP; + rtnl_unlock(); + + return ret; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 17a720b4473f..73a8915b0148 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -16,29 +16,29 @@ #include "br_private.h" -static bool br_rports_have_mc_router(struct net_bridge *br) +static bool br_rports_have_mc_router(struct net_bridge_mcast *brmctx) { #if IS_ENABLED(CONFIG_IPV6) - return !hlist_empty(&br->ip4_mc_router_list) || - !hlist_empty(&br->ip6_mc_router_list); + return !hlist_empty(&brmctx->ip4_mc_router_list) || + !hlist_empty(&brmctx->ip6_mc_router_list); #else - return !hlist_empty(&br->ip4_mc_router_list); + return !hlist_empty(&brmctx->ip4_mc_router_list); #endif } static bool br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer) { - *timer = br_timer_value(&port->ip4_mc_router_timer); - return !hlist_unhashed(&port->ip4_rlist); + *timer = br_timer_value(&port->multicast_ctx.ip4_mc_router_timer); + return !hlist_unhashed(&port->multicast_ctx.ip4_rlist); } static bool br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer) { #if IS_ENABLED(CONFIG_IPV6) - *timer = br_timer_value(&port->ip6_mc_router_timer); - return !hlist_unhashed(&port->ip6_rlist); + *timer = br_timer_value(&port->multicast_ctx.ip6_mc_router_timer); + return !hlist_unhashed(&port->multicast_ctx.ip6_rlist); #else *timer = 0; return false; @@ -54,10 +54,10 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct nlattr *nest, *port_nest; struct net_bridge_port *p; - if (!br->multicast_router) + if (!br->multicast_ctx.multicast_router) return 0; - if (!br_rports_have_mc_router(br)) + if (!br_rports_have_mc_router(&br->multicast_ctx)) return 0; nest = nla_nest_start_noflag(skb, MDBA_ROUTER); @@ -79,7 +79,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER, max(ip4_timer, ip6_timer)) || nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE, - p->multicast_router) || + p->multicast_ctx.multicast_router) || (have_ip4_mc_rtr && nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER, ip4_timer)) || @@ -240,7 +240,7 @@ static int __mdb_fill_info(struct sk_buff *skb, switch (mp->addr.proto) { case htons(ETH_P_IP): - dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3); + dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_igmp_version == 3); if (mp->addr.src.ip4) { if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE, mp->addr.src.ip4)) @@ -250,7 +250,7 @@ static int __mdb_fill_info(struct sk_buff *skb, break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - dump_srcs_mode = !!(mp->br->multicast_mld_version == 2); + dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_mld_version == 2); if (!ipv6_addr_any(&mp->addr.src.ip6)) { if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE, &mp->addr.src.ip6)) @@ -483,7 +483,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) /* MDBA_MDB_EATTR_SOURCE */ if (pg->key.addr.src.ip4) nlmsg_size += nla_total_size(sizeof(__be32)); - if (pg->key.port->br->multicast_igmp_version == 2) + if (pg->key.port->br->multicast_ctx.multicast_igmp_version == 2) goto out; addr_size = sizeof(__be32); break; @@ -492,7 +492,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) /* MDBA_MDB_EATTR_SOURCE */ if (!ipv6_addr_any(&pg->key.addr.src.ip6)) nlmsg_size += nla_total_size(sizeof(struct in6_addr)); - if (pg->key.port->br->multicast_mld_version == 1) + if (pg->key.port->br->multicast_ctx.multicast_mld_version == 1) goto out; addr_size = sizeof(struct in6_addr); break; @@ -617,6 +617,9 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, ASSERT_RTNL(); + if (!nb) + return 0; + if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) return -EINVAL; @@ -686,7 +689,6 @@ out_free_mdb: return err; } -EXPORT_SYMBOL_GPL(br_mdb_replay); static void br_mdb_switchdev_host_port(struct net_device *dev, struct net_device *lower_dev, @@ -781,12 +783,12 @@ errout: static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct net_device *dev, - int ifindex, u32 pid, + int ifindex, u16 vid, u32 pid, u32 seq, int type, unsigned int flags) { + struct nlattr *nest, *port_nest; struct br_port_msg *bpm; struct nlmsghdr *nlh; - struct nlattr *nest; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) @@ -800,8 +802,18 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb, if (!nest) goto cancel; - if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex)) + port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT); + if (!port_nest) + goto end; + if (nla_put_nohdr(skb, sizeof(u32), &ifindex)) { + nla_nest_cancel(skb, port_nest); goto end; + } + if (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid)) { + nla_nest_cancel(skb, port_nest); + goto end; + } + nla_nest_end(skb, port_nest); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); @@ -817,23 +829,28 @@ cancel: static inline size_t rtnl_rtr_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct br_port_msg)) - + nla_total_size(sizeof(__u32)); + + nla_total_size(sizeof(__u32)) + + nla_total_size(sizeof(u16)); } -void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, +void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx, int type) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; int ifindex; + u16 vid; - ifindex = port ? port->dev->ifindex : 0; + ifindex = pmctx ? pmctx->port->dev->ifindex : 0; + vid = pmctx && br_multicast_port_ctx_is_vlan(pmctx) ? pmctx->vlan->vid : + 0; skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; - err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF); + err = nlmsg_populate_rtr_fill(skb, dev, ifindex, vid, 0, 0, type, + NTF_SELF); if (err < 0) { kfree_skb(skb); goto errout; @@ -1004,14 +1021,47 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static struct net_bridge_mcast * +__br_mdb_choose_context(struct net_bridge *br, + const struct br_mdb_entry *entry, + struct netlink_ext_ack *extack) +{ + struct net_bridge_mcast *brmctx = NULL; + struct net_bridge_vlan *v; + + if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + brmctx = &br->multicast_ctx; + goto out; + } + + if (!entry->vid) { + NL_SET_ERR_MSG_MOD(extack, "Cannot add an entry without a vlan when vlan snooping is enabled"); + goto out; + } + + v = br_vlan_find(br_vlan_group(br), entry->vid); + if (!v) { + NL_SET_ERR_MSG_MOD(extack, "Vlan is not configured"); + goto out; + } + if (br_multicast_ctx_vlan_global_disabled(&v->br_mcast_ctx)) { + NL_SET_ERR_MSG_MOD(extack, "Vlan's multicast processing is disabled"); + goto out; + } + brmctx = &v->br_mcast_ctx; +out: + return brmctx; +} + static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_mdb_entry *entry, struct nlattr **mdb_attrs, struct netlink_ext_ack *extack) { struct net_bridge_mdb_entry *mp, *star_mp; - struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mcast *brmctx; struct br_ip group, star_group; unsigned long now = jiffies; unsigned char flags = 0; @@ -1020,6 +1070,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, __mdb_entry_to_br_ip(entry, &group, mdb_attrs); + brmctx = __br_mdb_choose_context(br, entry, extack); + if (!brmctx) + return -EINVAL; + /* host join errors which can happen before creating the group */ if (!port) { /* don't allow any flags for host-joined groups */ @@ -1053,7 +1107,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -EEXIST; } - br_multicast_host_join(mp, false); + br_multicast_host_join(brmctx, mp, false); br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB); return 0; @@ -1084,14 +1138,15 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, } rcu_assign_pointer(*pp, p); if (entry->state == MDB_TEMPORARY) - mod_timer(&p->timer, now + br->multicast_membership_interval); + mod_timer(&p->timer, + now + brmctx->multicast_membership_interval); br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); /* if we are adding a new EXCLUDE port group (*,G) it needs to be also * added to all S,G entries for proper replication, if we are adding * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be * added to it for proper replication */ - if (br_multicast_should_handle_mode(br, group.proto)) { + if (br_multicast_should_handle_mode(brmctx, group.proto)) { switch (filter_mode) { case MCAST_EXCLUDE: br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d0434dc8c03b..470f1ec3b579 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -49,30 +49,30 @@ static const struct rhashtable_params br_sg_port_rht_params = { .automatic_shrinking = true, }; -static void br_multicast_start_querier(struct net_bridge *br, +static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query); -static void br_ip4_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port); -static void br_ip4_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx); +static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src); static void br_multicast_port_group_rexmit(struct timer_list *t); static void -br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted); -static void br_ip6_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port); +br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted); +static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx); #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src); #endif static struct net_bridge_port_group * -__br_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +__br_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, @@ -80,6 +80,7 @@ __br_multicast_add_group(struct net_bridge *br, bool blocked); static void br_multicast_find_del_pg(struct net_bridge *br, struct net_bridge_port_group *pg); +static void __br_multicast_stop(struct net_bridge_mcast *brmctx); static struct net_bridge_port_group * br_sg_port_find(struct net_bridge *br, @@ -140,12 +141,14 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br, } #endif -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid) { + struct net_bridge *br = brmctx->br; struct br_ip ip; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || + br_multicast_ctx_vlan_global_disabled(brmctx)) return NULL; if (BR_INPUT_SKB_CB(skb)->igmp) @@ -158,7 +161,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, switch (skb->protocol) { case htons(ETH_P_IP): ip.dst.ip4 = ip_hdr(skb)->daddr; - if (br->multicast_igmp_version == 3) { + if (brmctx->multicast_igmp_version == 3) { struct net_bridge_mdb_entry *mdb; ip.src.ip4 = ip_hdr(skb)->saddr; @@ -171,7 +174,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ip.dst.ip6 = ipv6_hdr(skb)->daddr; - if (br->multicast_mld_version == 2) { + if (brmctx->multicast_mld_version == 2) { struct net_bridge_mdb_entry *mdb; ip.src.ip6 = ipv6_hdr(skb)->saddr; @@ -190,6 +193,62 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, return br_mdb_ip_get_rcu(br, &ip); } +/* IMPORTANT: this function must be used only when the contexts cannot be + * passed down (e.g. timer) and must be used for read-only purposes because + * the vlan snooping option can change, so it can return any context + * (non-vlan or vlan). Its initial intended purpose is to read timer values + * from the *current* context based on the option. At worst that could lead + * to inconsistent timers when the contexts are changed, i.e. src timer + * which needs to re-arm with a specific delay taken from the old context + */ +static struct net_bridge_mcast_port * +br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg) +{ + struct net_bridge_mcast_port *pmctx = &pg->key.port->multicast_ctx; + struct net_bridge_vlan *vlan; + + lockdep_assert_held_once(&pg->key.port->br->multicast_lock); + + /* if vlan snooping is disabled use the port's multicast context */ + if (!pg->key.addr.vid || + !br_opt_get(pg->key.port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + goto out; + + /* locking is tricky here, due to different rules for multicast and + * vlans we need to take rcu to find the vlan and make sure it has + * the BR_VLFLAG_MCAST_ENABLED flag set, it can only change under + * multicast_lock which must be already held here, so the vlan's pmctx + * can safely be used on return + */ + rcu_read_lock(); + vlan = br_vlan_find(nbp_vlan_group(pg->key.port), pg->key.addr.vid); + if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) + pmctx = &vlan->port_mcast_ctx; + else + pmctx = NULL; + rcu_read_unlock(); +out: + return pmctx; +} + +/* when snooping we need to check if the contexts should be used + * in the following order: + * - if pmctx is non-NULL (port), check if it should be used + * - if pmctx is NULL (bridge), check if brmctx should be used + */ +static bool +br_multicast_ctx_should_use(const struct net_bridge_mcast *brmctx, + const struct net_bridge_mcast_port *pmctx) +{ + if (!netif_running(brmctx->br->dev)) + return false; + + if (pmctx) + return !br_multicast_port_ctx_state_disabled(pmctx); + else + return !br_multicast_ctx_vlan_disabled(brmctx); +} + static bool br_port_group_equal(struct net_bridge_port_group *p, struct net_bridge_port *port, const unsigned char *src) @@ -203,20 +262,23 @@ static bool br_port_group_equal(struct net_bridge_port_group *p, return ether_addr_equal(src, p->eth_addr); } -static void __fwd_add_star_excl(struct net_bridge_port_group *pg, +static void __fwd_add_star_excl(struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, struct br_ip *sg_ip) { struct net_bridge_port_group_sg_key sg_key; - struct net_bridge *br = pg->key.port->br; struct net_bridge_port_group *src_pg; + struct net_bridge_mcast *brmctx; memset(&sg_key, 0, sizeof(sg_key)); + brmctx = br_multicast_port_ctx_get_global(pmctx); sg_key.port = pg->key.port; sg_key.addr = *sg_ip; - if (br_sg_port_find(br, &sg_key)) + if (br_sg_port_find(brmctx->br, &sg_key)) return; - src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr, + src_pg = __br_multicast_add_group(brmctx, pmctx, + sg_ip, pg->eth_addr, MCAST_INCLUDE, false, false); if (IS_ERR_OR_NULL(src_pg) || src_pg->rt_protocol != RTPROT_KERNEL) @@ -256,6 +318,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, { struct net_bridge *br = pg->key.port->br; struct net_bridge_port_group *pg_lst; + struct net_bridge_mcast_port *pmctx; struct net_bridge_mdb_entry *mp; struct br_ip sg_ip; @@ -265,9 +328,13 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, mp = br_mdb_ip_get(br, &pg->key.addr); if (!mp) return; + pmctx = br_multicast_pg_to_port_ctx(pg); + if (!pmctx) + return; memset(&sg_ip, 0, sizeof(sg_ip)); sg_ip = pg->key.addr; + for (pg_lst = mlock_dereference(mp->ports, br); pg_lst; pg_lst = mlock_dereference(pg_lst->next, br)) { @@ -284,7 +351,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, __fwd_del_star_excl(pg, &sg_ip); break; case MCAST_EXCLUDE: - __fwd_add_star_excl(pg, &sg_ip); + __fwd_add_star_excl(pmctx, pg, &sg_ip); break; } } @@ -377,7 +444,9 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, { struct net_bridge_port_group_sg_key sg_key; struct net_bridge *br = star_mp->br; + struct net_bridge_mcast_port *pmctx; struct net_bridge_port_group *pg; + struct net_bridge_mcast *brmctx; if (WARN_ON(br_multicast_is_star_g(&sg->key.addr))) return; @@ -400,7 +469,12 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, if (br_sg_port_find(br, &sg_key)) continue; - src_pg = __br_multicast_add_group(br, pg->key.port, + pmctx = br_multicast_pg_to_port_ctx(pg); + if (!pmctx) + continue; + brmctx = br_multicast_port_ctx_get_global(pmctx); + + src_pg = __br_multicast_add_group(brmctx, pmctx, &sg->key.addr, sg->eth_addr, MCAST_INCLUDE, false, false); @@ -414,16 +488,23 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, static void br_multicast_fwd_src_add(struct net_bridge_group_src *src) { struct net_bridge_mdb_entry *star_mp; + struct net_bridge_mcast_port *pmctx; struct net_bridge_port_group *sg; + struct net_bridge_mcast *brmctx; struct br_ip sg_ip; if (src->flags & BR_SGRP_F_INSTALLED) return; memset(&sg_ip, 0, sizeof(sg_ip)); + pmctx = br_multicast_pg_to_port_ctx(src->pg); + if (!pmctx) + return; + brmctx = br_multicast_port_ctx_get_global(pmctx); sg_ip = src->pg->key.addr; sg_ip.src = src->addr.src; - sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip, + + sg = __br_multicast_add_group(brmctx, pmctx, &sg_ip, src->pg->eth_addr, MCAST_INCLUDE, false, !timer_pending(&src->timer)); if (IS_ERR_OR_NULL(sg)) @@ -692,7 +773,28 @@ static void br_multicast_gc(struct hlist_head *head) } } -static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, +static void __br_multicast_query_handle_vlan(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct sk_buff *skb) +{ + struct net_bridge_vlan *vlan = NULL; + + if (pmctx && br_multicast_port_ctx_is_vlan(pmctx)) + vlan = pmctx->vlan; + else if (br_multicast_ctx_is_vlan(brmctx)) + vlan = brmctx->vlan; + + if (vlan && !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) { + u16 vlan_proto; + + if (br_vlan_get_proto(brmctx->br->dev, &vlan_proto) != 0) + return; + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan->vid); + } +} + +static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, __be32 ip_dst, __be32 group, bool with_srcs, bool over_lmqt, @@ -714,11 +816,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, u16 lmqt_srcs = 0; igmp_hdr_size = sizeof(*ih); - if (br->multicast_igmp_version == 3) { + if (brmctx->multicast_igmp_version == 3) { igmp_hdr_size = sizeof(*ihv3); if (pg && with_srcs) { - lmqt = now + (br->multicast_last_member_interval * - br->multicast_last_member_count); + lmqt = now + (brmctx->multicast_last_member_interval * + brmctx->multicast_last_member_count); hlist_for_each_entry(ent, &pg->src_list, node) { if (over_lmqt == time_after(ent->timer.expires, lmqt) && @@ -734,19 +836,20 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size; if ((p && pkt_size > p->dev->mtu) || - pkt_size > br->dev->mtu) + pkt_size > brmctx->br->dev->mtu) return NULL; - skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); + skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size); if (!skb) goto out; + __br_multicast_query_handle_vlan(brmctx, pmctx, skb); skb->protocol = htons(ETH_P_IP); skb_reset_mac_header(skb); eth = eth_hdr(skb); - ether_addr_copy(eth->h_source, br->dev->dev_addr); + ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr); ip_eth_mc_map(ip_dst, eth->h_dest); eth->h_proto = htons(ETH_P_IP); skb_put(skb, sizeof(*eth)); @@ -762,8 +865,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; - iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ? - inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; + iph->saddr = br_opt_get(brmctx->br, BROPT_MULTICAST_QUERY_USE_IFADDR) ? + inet_select_addr(brmctx->br->dev, 0, RT_SCOPE_LINK) : 0; iph->daddr = ip_dst; ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; @@ -775,12 +878,12 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_set_transport_header(skb, skb->len); *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; - switch (br->multicast_igmp_version) { + switch (brmctx->multicast_igmp_version) { case 2: ih = igmp_hdr(skb); ih->type = IGMP_HOST_MEMBERSHIP_QUERY; - ih->code = (group ? br->multicast_last_member_interval : - br->multicast_query_response_interval) / + ih->code = (group ? brmctx->multicast_last_member_interval : + brmctx->multicast_query_response_interval) / (HZ / IGMP_TIMER_SCALE); ih->group = group; ih->csum = 0; @@ -790,11 +893,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, case 3: ihv3 = igmpv3_query_hdr(skb); ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY; - ihv3->code = (group ? br->multicast_last_member_interval : - br->multicast_query_response_interval) / + ihv3->code = (group ? brmctx->multicast_last_member_interval : + brmctx->multicast_query_response_interval) / (HZ / IGMP_TIMER_SCALE); ihv3->group = group; - ihv3->qqic = br->multicast_query_interval / HZ; + ihv3->qqic = brmctx->multicast_query_interval / HZ; ihv3->nsrcs = htons(lmqt_srcs); ihv3->resv = 0; ihv3->suppress = sflag; @@ -837,7 +940,8 @@ out: } #if IS_ENABLED(CONFIG_IPV6) -static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, +static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, const struct in6_addr *ip6_dst, const struct in6_addr *group, @@ -862,11 +966,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, u8 *hopopt; mld_hdr_size = sizeof(*mldq); - if (br->multicast_mld_version == 2) { + if (brmctx->multicast_mld_version == 2) { mld_hdr_size = sizeof(*mld2q); if (pg && with_srcs) { - llqt = now + (br->multicast_last_member_interval * - br->multicast_last_member_count); + llqt = now + (brmctx->multicast_last_member_interval * + brmctx->multicast_last_member_count); hlist_for_each_entry(ent, &pg->src_list, node) { if (over_llqt == time_after(ent->timer.expires, llqt) && @@ -882,20 +986,21 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size; if ((p && pkt_size > p->dev->mtu) || - pkt_size > br->dev->mtu) + pkt_size > brmctx->br->dev->mtu) return NULL; - skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); + skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size); if (!skb) goto out; + __br_multicast_query_handle_vlan(brmctx, pmctx, skb); skb->protocol = htons(ETH_P_IPV6); /* Ethernet header */ skb_reset_mac_header(skb); eth = eth_hdr(skb); - ether_addr_copy(eth->h_source, br->dev->dev_addr); + ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); @@ -908,14 +1013,14 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ip6h->daddr = *ip6_dst; - if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, - &ip6h->saddr)) { + if (ipv6_dev_get_saddr(dev_net(brmctx->br->dev), brmctx->br->dev, + &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); - br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false); + br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, false); return NULL; } - br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); + br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, true); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -933,10 +1038,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, /* ICMPv6 */ skb_set_transport_header(skb, skb->len); interval = ipv6_addr_any(group) ? - br->multicast_query_response_interval : - br->multicast_last_member_interval; + brmctx->multicast_query_response_interval : + brmctx->multicast_last_member_interval; *igmp_type = ICMPV6_MGM_QUERY; - switch (br->multicast_mld_version) { + switch (brmctx->multicast_mld_version) { case 1: mldq = (struct mld_msg *)icmp6_hdr(skb); mldq->mld_type = ICMPV6_MGM_QUERY; @@ -959,7 +1064,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, mld2q->mld2q_suppress = sflag; mld2q->mld2q_qrv = 2; mld2q->mld2q_nsrcs = htons(llqt_srcs); - mld2q->mld2q_qqic = br->multicast_query_interval / HZ; + mld2q->mld2q_qqic = brmctx->multicast_query_interval / HZ; mld2q->mld2q_mca = *group; csum = &mld2q->mld2q_cksum; csum_start = (void *)mld2q; @@ -1000,7 +1105,8 @@ out: } #endif -static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, +static struct sk_buff *br_multicast_alloc_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, struct br_ip *ip_dst, struct br_ip *group, @@ -1013,7 +1119,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, switch (group->proto) { case htons(ETH_P_IP): ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP); - return br_ip4_multicast_alloc_query(br, pg, + return br_ip4_multicast_alloc_query(brmctx, pmctx, pg, ip4_dst, group->dst.ip4, with_srcs, over_lmqt, sflag, igmp_type, @@ -1028,7 +1134,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0, htonl(1)); - return br_ip6_multicast_alloc_query(br, pg, + return br_ip6_multicast_alloc_query(brmctx, pmctx, pg, &ip6_dst, &group->dst.ip6, with_srcs, over_lmqt, sflag, igmp_type, @@ -1206,7 +1312,8 @@ struct net_bridge_port_group *br_multicast_new_port_group( return p; } -void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) +void br_multicast_host_join(const struct net_bridge_mcast *brmctx, + struct net_bridge_mdb_entry *mp, bool notify) { if (!mp->host_joined) { mp->host_joined = true; @@ -1219,7 +1326,7 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) if (br_group_is_l2(&mp->addr)) return; - mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval); + mod_timer(&mp->timer, jiffies + brmctx->multicast_membership_interval); } void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) @@ -1235,8 +1342,8 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) } static struct net_bridge_port_group * -__br_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +__br_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, @@ -1248,29 +1355,28 @@ __br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; unsigned long now = jiffies; - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; - mp = br_multicast_new_group(br, group); + mp = br_multicast_new_group(brmctx->br, group); if (IS_ERR(mp)) return ERR_CAST(mp); - if (!port) { - br_multicast_host_join(mp, true); + if (!pmctx) { + br_multicast_host_join(brmctx, mp, true); goto out; } for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { - if (br_port_group_equal(p, port, src)) + if (br_port_group_equal(p, pmctx->port, src)) goto found; - if ((unsigned long)p->key.port < (unsigned long)port) + if ((unsigned long)p->key.port < (unsigned long)pmctx->port) break; } - p = br_multicast_new_port_group(port, group, *pp, 0, src, + p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src, filter_mode, RTPROT_KERNEL); if (unlikely(!p)) { p = ERR_PTR(-ENOMEM); @@ -1279,18 +1385,19 @@ __br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); if (blocked) p->flags |= MDB_PG_FLAGS_BLOCKED; - br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); + br_mdb_notify(brmctx->br->dev, mp, p, RTM_NEWMDB); found: if (igmpv2_mldv1) - mod_timer(&p->timer, now + br->multicast_membership_interval); + mod_timer(&p->timer, + now + brmctx->multicast_membership_interval); out: return p; } -static int br_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +static int br_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, @@ -1299,18 +1406,18 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port_group *pg; int err; - spin_lock(&br->multicast_lock); - pg = __br_multicast_add_group(br, port, group, src, filter_mode, + spin_lock(&brmctx->br->multicast_lock); + pg = __br_multicast_add_group(brmctx, pmctx, group, src, filter_mode, igmpv2_mldv1, false); /* NULL is considered valid for host joined groups */ err = PTR_ERR_OR_ZERO(pg); - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); return err; } -static int br_ip4_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip4_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src, @@ -1328,13 +1435,13 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, br_group.vid = vid; filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE; - return br_multicast_add_group(br, port, &br_group, src, filter_mode, - igmpv2); + return br_multicast_add_group(brmctx, pmctx, &br_group, src, + filter_mode, igmpv2); } #if IS_ENABLED(CONFIG_IPV6) -static int br_ip6_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip6_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src, @@ -1352,8 +1459,8 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, br_group.vid = vid; filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE; - return br_multicast_add_group(br, port, &br_group, src, filter_mode, - mldv1); + return br_multicast_add_group(brmctx, pmctx, &br_group, src, + filter_mode, mldv1); } #endif @@ -1366,52 +1473,54 @@ static bool br_multicast_rport_del(struct hlist_node *rlist) return true; } -static bool br_ip4_multicast_rport_del(struct net_bridge_port *p) +static bool br_ip4_multicast_rport_del(struct net_bridge_mcast_port *pmctx) { - return br_multicast_rport_del(&p->ip4_rlist); + return br_multicast_rport_del(&pmctx->ip4_rlist); } -static bool br_ip6_multicast_rport_del(struct net_bridge_port *p) +static bool br_ip6_multicast_rport_del(struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) - return br_multicast_rport_del(&p->ip6_rlist); + return br_multicast_rport_del(&pmctx->ip6_rlist); #else return false; #endif } -static void br_multicast_router_expired(struct net_bridge_port *port, +static void br_multicast_router_expired(struct net_bridge_mcast_port *pmctx, struct timer_list *t, struct hlist_node *rlist) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; bool del; spin_lock(&br->multicast_lock); - if (port->multicast_router == MDB_RTR_TYPE_DISABLED || - port->multicast_router == MDB_RTR_TYPE_PERM || + if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED || + pmctx->multicast_router == MDB_RTR_TYPE_PERM || timer_pending(t)) goto out; del = br_multicast_rport_del(rlist); - br_multicast_rport_del_notify(port, del); + br_multicast_rport_del_notify(pmctx, del); out: spin_unlock(&br->multicast_lock); } static void br_ip4_multicast_router_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip4_mc_router_timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip4_mc_router_timer); - br_multicast_router_expired(port, t, &port->ip4_rlist); + br_multicast_router_expired(pmctx, t, &pmctx->ip4_rlist); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_router_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip6_mc_router_timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip6_mc_router_timer); - br_multicast_router_expired(port, t, &port->ip6_rlist); + br_multicast_router_expired(pmctx, t, &pmctx->ip6_rlist); } #endif @@ -1428,80 +1537,86 @@ static void br_mc_router_state_change(struct net_bridge *p, switchdev_port_attr_set(p->dev, &attr, NULL); } -static void br_multicast_local_router_expired(struct net_bridge *br, +static void br_multicast_local_router_expired(struct net_bridge_mcast *brmctx, struct timer_list *timer) { - spin_lock(&br->multicast_lock); - if (br->multicast_router == MDB_RTR_TYPE_DISABLED || - br->multicast_router == MDB_RTR_TYPE_PERM || - br_ip4_multicast_is_router(br) || - br_ip6_multicast_is_router(br)) + spin_lock(&brmctx->br->multicast_lock); + if (brmctx->multicast_router == MDB_RTR_TYPE_DISABLED || + brmctx->multicast_router == MDB_RTR_TYPE_PERM || + br_ip4_multicast_is_router(brmctx) || + br_ip6_multicast_is_router(brmctx)) goto out; - br_mc_router_state_change(br, false); + br_mc_router_state_change(brmctx->br, false); out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_local_router_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip4_mc_router_timer); - br_multicast_local_router_expired(br, t); + br_multicast_local_router_expired(brmctx, t); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_local_router_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip6_mc_router_timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip6_mc_router_timer); - br_multicast_local_router_expired(br, t); + br_multicast_local_router_expired(brmctx, t); } #endif -static void br_multicast_querier_expired(struct net_bridge *br, +static void br_multicast_querier_expired(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query) { - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!netif_running(brmctx->br->dev) || + br_multicast_ctx_vlan_global_disabled(brmctx) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) goto out; - br_multicast_start_querier(br, query); + br_multicast_start_querier(brmctx, query); out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip4_other_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip4_other_query.timer); - br_multicast_querier_expired(br, &br->ip4_own_query); + br_multicast_querier_expired(brmctx, &brmctx->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip6_other_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip6_other_query.timer); - br_multicast_querier_expired(br, &br->ip6_own_query); + br_multicast_querier_expired(brmctx, &brmctx->ip6_own_query); } #endif -static void br_multicast_select_own_querier(struct net_bridge *br, +static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) { if (ip->proto == htons(ETH_P_IP)) - br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr; + brmctx->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr; #if IS_ENABLED(CONFIG_IPV6) else - br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr; + brmctx->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr; #endif } -static void __br_multicast_send_query(struct net_bridge *br, - struct net_bridge_port *port, +static void __br_multicast_send_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, struct br_ip *ip_dst, struct br_ip *group, @@ -1513,19 +1628,22 @@ static void __br_multicast_send_query(struct net_bridge *br, struct sk_buff *skb; u8 igmp_type; + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + return; + again_under_lmqt: - skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs, - over_lmqt, sflag, &igmp_type, + skb = br_multicast_alloc_query(brmctx, pmctx, pg, ip_dst, group, + with_srcs, over_lmqt, sflag, &igmp_type, need_rexmit); if (!skb) return; - if (port) { - skb->dev = port->dev; - br_multicast_count(br, port, skb, igmp_type, + if (pmctx) { + skb->dev = pmctx->port->dev; + br_multicast_count(brmctx->br, pmctx->port, skb, igmp_type, BR_MCAST_DIR_TX); NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, - dev_net(port->dev), NULL, skb, NULL, skb->dev, + dev_net(pmctx->port->dev), NULL, skb, NULL, skb->dev, br_dev_queue_push_xmit); if (over_lmqt && with_srcs && sflag) { @@ -1533,35 +1651,35 @@ again_under_lmqt: goto again_under_lmqt; } } else { - br_multicast_select_own_querier(br, group, skb); - br_multicast_count(br, port, skb, igmp_type, + br_multicast_select_own_querier(brmctx, group, skb); + br_multicast_count(brmctx->br, NULL, skb, igmp_type, BR_MCAST_DIR_RX); netif_rx(skb); } } -static void br_multicast_send_query(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_send_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *own_query) { struct bridge_mcast_other_query *other_query = NULL; struct br_ip br_group; unsigned long time; - if (!netif_running(br->dev) || - !br_opt_get(br, BROPT_MULTICAST_ENABLED) || - !br_opt_get(br, BROPT_MULTICAST_QUERIER)) + if (!br_multicast_ctx_should_use(brmctx, pmctx) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) return; memset(&br_group.dst, 0, sizeof(br_group.dst)); - if (port ? (own_query == &port->ip4_own_query) : - (own_query == &br->ip4_own_query)) { - other_query = &br->ip4_other_query; + if (pmctx ? (own_query == &pmctx->ip4_own_query) : + (own_query == &brmctx->ip4_own_query)) { + other_query = &brmctx->ip4_other_query; br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) } else { - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; br_group.proto = htons(ETH_P_IPV6); #endif } @@ -1569,31 +1687,32 @@ static void br_multicast_send_query(struct net_bridge *br, if (!other_query || timer_pending(&other_query->timer)) return; - __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0, - NULL); + __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false, + 0, NULL); time = jiffies; - time += own_query->startup_sent < br->multicast_startup_query_count ? - br->multicast_startup_query_interval : - br->multicast_query_interval; + time += own_query->startup_sent < brmctx->multicast_startup_query_count ? + brmctx->multicast_startup_query_interval : + brmctx->multicast_query_interval; mod_timer(&own_query->timer, time); } static void -br_multicast_port_query_expired(struct net_bridge_port *port, +br_multicast_port_query_expired(struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *query) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; + struct net_bridge_mcast *brmctx; spin_lock(&br->multicast_lock); - if (port->state == BR_STATE_DISABLED || - port->state == BR_STATE_BLOCKING) + if (br_multicast_port_ctx_state_stopped(pmctx)) goto out; - if (query->startup_sent < br->multicast_startup_query_count) + brmctx = br_multicast_port_ctx_get_global(pmctx); + if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; - br_multicast_send_query(port->br, port, query); + br_multicast_send_query(brmctx, pmctx, query); out: spin_unlock(&br->multicast_lock); @@ -1601,17 +1720,19 @@ out: static void br_ip4_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip4_own_query.timer); - br_multicast_port_query_expired(port, &port->ip4_own_query); + br_multicast_port_query_expired(pmctx, &pmctx->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip6_own_query.timer); - br_multicast_port_query_expired(port, &port->ip6_own_query); + br_multicast_port_query_expired(pmctx, &pmctx->ip6_own_query); } #endif @@ -1620,6 +1741,8 @@ static void br_multicast_port_group_rexmit(struct timer_list *t) struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer); struct bridge_mcast_other_query *other_query = NULL; struct net_bridge *br = pg->key.port->br; + struct net_bridge_mcast_port *pmctx; + struct net_bridge_mcast *brmctx; bool need_rexmit = false; spin_lock(&br->multicast_lock); @@ -1628,11 +1751,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t) !br_opt_get(br, BROPT_MULTICAST_QUERIER)) goto out; + pmctx = br_multicast_pg_to_port_ctx(pg); + if (!pmctx) + goto out; + brmctx = br_multicast_port_ctx_get_global(pmctx); if (pg->key.addr.proto == htons(ETH_P_IP)) - other_query = &br->ip4_other_query; + other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; #endif if (!other_query || timer_pending(&other_query->timer)) @@ -1640,15 +1767,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t) if (pg->grp_query_rexmit_cnt) { pg->grp_query_rexmit_cnt--; - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, false, 1, NULL); } - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, true, 0, &need_rexmit); if (pg->grp_query_rexmit_cnt || need_rexmit) mod_timer(&pg->rexmit_timer, jiffies + - br->multicast_last_member_interval); + brmctx->multicast_last_member_interval); out: spin_unlock(&br->multicast_lock); } @@ -1666,23 +1793,40 @@ static int br_mc_disabled_update(struct net_device *dev, bool value, return switchdev_port_attr_set(dev, &attr, extack); } -int br_multicast_add_port(struct net_bridge_port *port) +void br_multicast_port_ctx_init(struct net_bridge_port *port, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast_port *pmctx) { - int err; - - port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; - - timer_setup(&port->ip4_mc_router_timer, + pmctx->port = port; + pmctx->vlan = vlan; + pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + timer_setup(&pmctx->ip4_mc_router_timer, br_ip4_multicast_router_expired, 0); - timer_setup(&port->ip4_own_query.timer, + timer_setup(&pmctx->ip4_own_query.timer, br_ip4_multicast_port_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - timer_setup(&port->ip6_mc_router_timer, + timer_setup(&pmctx->ip6_mc_router_timer, br_ip6_multicast_router_expired, 0); - timer_setup(&port->ip6_own_query.timer, + timer_setup(&pmctx->ip6_own_query.timer, br_ip6_multicast_port_query_expired, 0); #endif +} + +void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) +{ +#if IS_ENABLED(CONFIG_IPV6) + del_timer_sync(&pmctx->ip6_mc_router_timer); +#endif + del_timer_sync(&pmctx->ip4_mc_router_timer); +} + +int br_multicast_add_port(struct net_bridge_port *port) +{ + int err; + + port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; + br_multicast_port_ctx_init(port, NULL, &port->multicast_ctx); + err = br_mc_disabled_update(port->dev, br_opt_get(port->br, BROPT_MULTICAST_ENABLED), @@ -1711,10 +1855,7 @@ void br_multicast_del_port(struct net_bridge_port *port) hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); br_multicast_gc(&deleted_head); - del_timer_sync(&port->ip4_mc_router_timer); -#if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&port->ip6_mc_router_timer); -#endif + br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } @@ -1727,20 +1868,23 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query) mod_timer(&query->timer, jiffies); } -static void __br_multicast_enable_port(struct net_bridge_port *port) +static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; + struct net_bridge_mcast *brmctx; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev)) + brmctx = br_multicast_port_ctx_get_global(pmctx); + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || + !netif_running(br->dev)) return; - br_multicast_enable(&port->ip4_own_query); + br_multicast_enable(&pmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - br_multicast_enable(&port->ip6_own_query); + br_multicast_enable(&pmctx->ip6_own_query); #endif - if (port->multicast_router == MDB_RTR_TYPE_PERM) { - br_ip4_multicast_add_router(br, port); - br_ip6_multicast_add_router(br, port); + if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) { + br_ip4_multicast_add_router(brmctx, pmctx); + br_ip6_multicast_add_router(brmctx, pmctx); } } @@ -1748,33 +1892,39 @@ void br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; - spin_lock(&br->multicast_lock); - __br_multicast_enable_port(port); - spin_unlock(&br->multicast_lock); + spin_lock_bh(&br->multicast_lock); + __br_multicast_enable_port_ctx(&port->multicast_ctx); + spin_unlock_bh(&br->multicast_lock); } -void br_multicast_disable_port(struct net_bridge_port *port) +static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) { - struct net_bridge *br = port->br; struct net_bridge_port_group *pg; struct hlist_node *n; bool del = false; - spin_lock(&br->multicast_lock); - hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) - if (!(pg->flags & MDB_PG_FLAGS_PERMANENT)) - br_multicast_find_del_pg(br, pg); + hlist_for_each_entry_safe(pg, n, &pmctx->port->mglist, mglist) + if (!(pg->flags & MDB_PG_FLAGS_PERMANENT) && + (!br_multicast_port_ctx_is_vlan(pmctx) || + pg->key.addr.vid == pmctx->vlan->vid)) + br_multicast_find_del_pg(pmctx->port->br, pg); - del |= br_ip4_multicast_rport_del(port); - del_timer(&port->ip4_mc_router_timer); - del_timer(&port->ip4_own_query.timer); - del |= br_ip6_multicast_rport_del(port); + del |= br_ip4_multicast_rport_del(pmctx); + del_timer(&pmctx->ip4_mc_router_timer); + del_timer(&pmctx->ip4_own_query.timer); + del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&port->ip6_mc_router_timer); - del_timer(&port->ip6_own_query.timer); + del_timer(&pmctx->ip6_mc_router_timer); + del_timer(&pmctx->ip6_own_query.timer); #endif - br_multicast_rport_del_notify(port, del); - spin_unlock(&br->multicast_lock); + br_multicast_rport_del_notify(pmctx, del); +} + +void br_multicast_disable_port(struct net_bridge_port *port) +{ + spin_lock_bh(&port->br->multicast_lock); + __br_multicast_disable_port_ctx(&port->multicast_ctx); + spin_unlock_bh(&port->br->multicast_lock); } static int __grp_src_delete_marked(struct net_bridge_port_group *pg) @@ -1799,31 +1949,33 @@ static void __grp_src_mod_timer(struct net_bridge_group_src *src, br_multicast_fwd_src_handle(src); } -static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg) +static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg) { struct bridge_mcast_other_query *other_query = NULL; - struct net_bridge *br = pg->key.port->br; - u32 lmqc = br->multicast_last_member_count; + u32 lmqc = brmctx->multicast_last_member_count; unsigned long lmqt, lmi, now = jiffies; struct net_bridge_group_src *ent; - if (!netif_running(br->dev) || - !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!netif_running(brmctx->br->dev) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) return; if (pg->key.addr.proto == htons(ETH_P_IP)) - other_query = &br->ip4_other_query; + other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; #endif - lmqt = now + br_multicast_lmqt(br); + lmqt = now + br_multicast_lmqt(brmctx); hlist_for_each_entry(ent, &pg->src_list, node) { if (ent->flags & BR_SGRP_F_SEND) { ent->flags &= ~BR_SGRP_F_SEND; if (ent->timer.expires > lmqt) { - if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + if (br_opt_get(brmctx->br, + BROPT_MULTICAST_QUERIER) && other_query && !timer_pending(&other_query->timer)) ent->src_query_rexmit_cnt = lmqc; @@ -1832,41 +1984,42 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg) } } - if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) || + if (!br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) || !other_query || timer_pending(&other_query->timer)) return; - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, true, 1, NULL); - lmi = now + br->multicast_last_member_interval; + lmi = now + brmctx->multicast_last_member_interval; if (!timer_pending(&pg->rexmit_timer) || time_after(pg->rexmit_timer.expires, lmi)) mod_timer(&pg->rexmit_timer, lmi); } -static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) +static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg) { struct bridge_mcast_other_query *other_query = NULL; - struct net_bridge *br = pg->key.port->br; unsigned long now = jiffies, lmi; - if (!netif_running(br->dev) || - !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!netif_running(brmctx->br->dev) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) return; if (pg->key.addr.proto == htons(ETH_P_IP)) - other_query = &br->ip4_other_query; + other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; #endif - if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) && other_query && !timer_pending(&other_query->timer)) { - lmi = now + br->multicast_last_member_interval; - pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1; - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + lmi = now + brmctx->multicast_last_member_interval; + pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1; + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, false, 0, NULL); if (!timer_pending(&pg->rexmit_timer) || time_after(pg->rexmit_timer.expires, lmi)) @@ -1875,8 +2028,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) if (pg->filter_mode == MCAST_EXCLUDE && (!timer_pending(&pg->timer) || - time_after(pg->timer.expires, now + br_multicast_lmqt(br)))) - mod_timer(&pg->timer, now + br_multicast_lmqt(br)); + time_after(pg->timer.expires, now + br_multicast_lmqt(brmctx)))) + mod_timer(&pg->timer, now + br_multicast_lmqt(brmctx)); } /* State Msg type New state Actions @@ -1884,11 +2037,11 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI */ -static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_isinc_allow(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; @@ -1907,10 +2060,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a } if (ent) - __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; return changed; @@ -1921,7 +2075,8 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a * Delete (A-B) * Group Timer=GMI */ -static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, +static void __grp_src_isexc_incl(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -1945,7 +2100,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, br_multicast_fwd_src_handle(ent); } - br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type); __grp_src_delete_marked(pg); } @@ -1956,11 +2112,11 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, * Delete (Y-A) * Group Timer=GMI */ -static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_isexc_excl(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; @@ -1981,13 +2137,14 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, ent = br_multicast_new_group_src(pg, &src_ip); if (ent) { __grp_src_mod_timer(ent, - now + br_multicast_gmi(br)); + now + br_multicast_gmi(brmctx)); changed = true; } } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (__grp_src_delete_marked(pg)) @@ -1996,28 +2153,28 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, return changed; } -static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_isexc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size, + __grp_src_isexc_incl(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_isexc_excl(brmctx, pg, h_addr, srcs, nsrcs, + addr_size, grec_type); break; } pg->filter_mode = MCAST_EXCLUDE; - mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx)); return changed; } @@ -2026,11 +2183,12 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI * Send Q(G,A-B) */ -static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_toin_incl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; struct net_bridge_group_src *ent; unsigned long now = jiffies; @@ -2054,14 +2212,15 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, changed = true; } if (ent) - __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } @@ -2071,11 +2230,12 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,X-A) * Send Q(G) */ -static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_toin_excl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; struct net_bridge_group_src *ent; unsigned long now = jiffies; @@ -2102,21 +2262,24 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, changed = true; } if (ent) - __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); - __grp_send_query_and_rexmit(pg); + __grp_send_query_and_rexmit(brmctx, pmctx, pg); return changed; } -static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_toin(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -2124,12 +2287,12 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_toin_incl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_toin_excl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; } @@ -2151,7 +2314,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,A*B) * Group Timer=GMI */ -static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, +static void __grp_src_toex_incl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -2178,11 +2343,12 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, br_multicast_fwd_src_handle(ent); } - br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type); __grp_src_delete_marked(pg); if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); } /* State Msg type New state Actions @@ -2192,7 +2358,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,A-Y) * Group Timer=GMI */ -static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_toex_excl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -2224,39 +2392,41 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (__grp_src_delete_marked(pg)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } -static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_toex(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + __grp_src_toex_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, + addr_size, grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_toex_excl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; } pg->filter_mode = MCAST_EXCLUDE; - mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx)); return changed; } @@ -2264,7 +2434,9 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, /* State Msg type New state Actions * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ -static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_block_incl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; @@ -2286,11 +2458,12 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } @@ -2299,7 +2472,9 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer * Send Q(G,A-Y) */ -static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_block_excl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; @@ -2328,28 +2503,31 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } -static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_block(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_block_incl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_block_excl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; } @@ -2384,12 +2562,12 @@ br_multicast_find_port(struct net_bridge_mdb_entry *mp, return NULL; } -static int br_ip4_multicast_igmp3_report(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { - bool igmpv2 = br->multicast_igmp_version == 2; + bool igmpv2 = brmctx->multicast_igmp_version == 2; struct net_bridge_mdb_entry *mdst; struct net_bridge_port_group *pg; const unsigned char *src; @@ -2436,25 +2614,29 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, if (nsrcs == 0 && (type == IGMPV3_CHANGE_TO_INCLUDE || type == IGMPV3_MODE_IS_INCLUDE)) { - if (!port || igmpv2) { - br_ip4_multicast_leave_group(br, port, group, vid, src); + if (!pmctx || igmpv2) { + br_ip4_multicast_leave_group(brmctx, pmctx, + group, vid, src); continue; } } else { - err = br_ip4_multicast_add_group(br, port, group, vid, - src, igmpv2); + err = br_ip4_multicast_add_group(brmctx, pmctx, group, + vid, src, igmpv2); if (err) break; } - if (!port || igmpv2) + if (!pmctx || igmpv2) continue; - spin_lock_bh(&br->multicast_lock); - mdst = br_mdb_ip4_get(br, group, vid); + spin_lock_bh(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + goto unlock_continue; + + mdst = br_mdb_ip4_get(brmctx->br, group, vid); if (!mdst) goto unlock_continue; - pg = br_multicast_find_port(mdst, port, src); + pg = br_multicast_find_port(mdst, pmctx->port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; /* reload grec and host addr */ @@ -2462,46 +2644,52 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, h_addr = &ip_hdr(skb)->saddr; switch (type) { case IGMPV3_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, h_addr, grec->grec_src, + changed = br_multicast_isexc(brmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, h_addr, grec->grec_src, + changed = br_multicast_toin(brmctx, pmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, h_addr, grec->grec_src, + changed = br_multicast_toex(brmctx, pmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, h_addr, grec->grec_src, + changed = br_multicast_block(brmctx, pmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; } if (changed) - br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); + br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: - spin_unlock_bh(&br->multicast_lock); + spin_unlock_bh(&brmctx->br->multicast_lock); } return err; } #if IS_ENABLED(CONFIG_IPV6) -static int br_ip6_multicast_mld2_report(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { - bool mldv1 = br->multicast_mld_version == 1; + bool mldv1 = brmctx->multicast_mld_version == 1; struct net_bridge_mdb_entry *mdst; struct net_bridge_port_group *pg; unsigned int nsrcs_offset; @@ -2562,137 +2750,144 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || grec->grec_type == MLD2_MODE_IS_INCLUDE) && nsrcs == 0) { - if (!port || mldv1) { - br_ip6_multicast_leave_group(br, port, + if (!pmctx || mldv1) { + br_ip6_multicast_leave_group(brmctx, pmctx, &grec->grec_mca, vid, src); continue; } } else { - err = br_ip6_multicast_add_group(br, port, + err = br_ip6_multicast_add_group(brmctx, pmctx, &grec->grec_mca, vid, src, mldv1); if (err) break; } - if (!port || mldv1) + if (!pmctx || mldv1) continue; - spin_lock_bh(&br->multicast_lock); - mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid); + spin_lock_bh(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + goto unlock_continue; + + mdst = br_mdb_ip6_get(brmctx->br, &grec->grec_mca, vid); if (!mdst) goto unlock_continue; - pg = br_multicast_find_port(mdst, port, src); + pg = br_multicast_find_port(mdst, pmctx->port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; h_addr = &ipv6_hdr(skb)->saddr; switch (grec->grec_type) { case MLD2_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, h_addr, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, h_addr, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, h_addr, + changed = br_multicast_isexc(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, h_addr, + changed = br_multicast_toin(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, h_addr, + changed = br_multicast_toex(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, h_addr, + changed = br_multicast_block(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; } if (changed) - br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); + br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: - spin_unlock_bh(&br->multicast_lock); + spin_unlock_bh(&brmctx->br->multicast_lock); } return err; } #endif -static bool br_ip4_multicast_select_querier(struct net_bridge *br, - struct net_bridge_port *port, +static bool br_ip4_multicast_select_querier(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 saddr) { - if (!timer_pending(&br->ip4_own_query.timer) && - !timer_pending(&br->ip4_other_query.timer)) + struct net_bridge_port *port = pmctx ? pmctx->port : NULL; + + if (!timer_pending(&brmctx->ip4_own_query.timer) && + !timer_pending(&brmctx->ip4_other_query.timer)) goto update; - if (!br->ip4_querier.addr.src.ip4) + if (!brmctx->ip4_querier.addr.src.ip4) goto update; - if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4)) + if (ntohl(saddr) <= ntohl(brmctx->ip4_querier.addr.src.ip4)) goto update; return false; update: - br->ip4_querier.addr.src.ip4 = saddr; + brmctx->ip4_querier.addr.src.ip4 = saddr; /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(br->ip4_querier.port, port); + rcu_assign_pointer(brmctx->ip4_querier.port, port); return true; } #if IS_ENABLED(CONFIG_IPV6) -static bool br_ip6_multicast_select_querier(struct net_bridge *br, - struct net_bridge_port *port, +static bool br_ip6_multicast_select_querier(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct in6_addr *saddr) { - if (!timer_pending(&br->ip6_own_query.timer) && - !timer_pending(&br->ip6_other_query.timer)) + struct net_bridge_port *port = pmctx ? pmctx->port : NULL; + + if (!timer_pending(&brmctx->ip6_own_query.timer) && + !timer_pending(&brmctx->ip6_other_query.timer)) goto update; - if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0) + if (ipv6_addr_cmp(saddr, &brmctx->ip6_querier.addr.src.ip6) <= 0) goto update; return false; update: - br->ip6_querier.addr.src.ip6 = *saddr; + brmctx->ip6_querier.addr.src.ip6 = *saddr; /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(br->ip6_querier.port, port); + rcu_assign_pointer(brmctx->ip6_querier.port, port); return true; } #endif static void -br_multicast_update_query_timer(struct net_bridge *br, +br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, struct bridge_mcast_other_query *query, unsigned long max_delay) { if (!timer_pending(&query->timer)) query->delay_time = jiffies + max_delay; - mod_timer(&query->timer, jiffies + br->multicast_querier_interval); + mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } static void br_port_mc_router_state_change(struct net_bridge_port *p, @@ -2709,19 +2904,26 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p, } static struct net_bridge_port * -br_multicast_rport_from_node(struct net_bridge *br, +br_multicast_rport_from_node(struct net_bridge_mcast *brmctx, struct hlist_head *mc_router_list, struct hlist_node *rlist) { + struct net_bridge_mcast_port *pmctx; + #if IS_ENABLED(CONFIG_IPV6) - if (mc_router_list == &br->ip6_mc_router_list) - return hlist_entry(rlist, struct net_bridge_port, ip6_rlist); + if (mc_router_list == &brmctx->ip6_mc_router_list) + pmctx = hlist_entry(rlist, struct net_bridge_mcast_port, + ip6_rlist); + else #endif - return hlist_entry(rlist, struct net_bridge_port, ip4_rlist); + pmctx = hlist_entry(rlist, struct net_bridge_mcast_port, + ip4_rlist); + + return pmctx->port; } static struct hlist_node * -br_multicast_get_rport_slot(struct net_bridge *br, +br_multicast_get_rport_slot(struct net_bridge_mcast *brmctx, struct net_bridge_port *port, struct hlist_head *mc_router_list) @@ -2731,7 +2933,7 @@ br_multicast_get_rport_slot(struct net_bridge *br, struct hlist_node *rlist; hlist_for_each(rlist, mc_router_list) { - p = br_multicast_rport_from_node(br, mc_router_list, rlist); + p = br_multicast_rport_from_node(brmctx, mc_router_list, rlist); if ((unsigned long)port >= (unsigned long)p) break; @@ -2742,14 +2944,14 @@ br_multicast_get_rport_slot(struct net_bridge *br, return slot; } -static bool br_multicast_no_router_otherpf(struct net_bridge_port *port, +static bool br_multicast_no_router_otherpf(struct net_bridge_mcast_port *pmctx, struct hlist_node *rnode) { #if IS_ENABLED(CONFIG_IPV6) - if (rnode != &port->ip6_rlist) - return hlist_unhashed(&port->ip6_rlist); + if (rnode != &pmctx->ip6_rlist) + return hlist_unhashed(&pmctx->ip6_rlist); else - return hlist_unhashed(&port->ip4_rlist); + return hlist_unhashed(&pmctx->ip4_rlist); #else return true; #endif @@ -2759,8 +2961,8 @@ static bool br_multicast_no_router_otherpf(struct net_bridge_port *port, * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -static void br_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct hlist_node *rlist, struct hlist_head *mc_router_list) { @@ -2769,7 +2971,7 @@ static void br_multicast_add_router(struct net_bridge *br, if (!hlist_unhashed(rlist)) return; - slot = br_multicast_get_rport_slot(br, port, mc_router_list); + slot = br_multicast_get_rport_slot(brmctx, pmctx->port, mc_router_list); if (slot) hlist_add_behind_rcu(rlist, slot); @@ -2780,9 +2982,9 @@ static void br_multicast_add_router(struct net_bridge *br, * switched from no IPv4/IPv6 multicast router to a new * IPv4 or IPv6 multicast router. */ - if (br_multicast_no_router_otherpf(port, rlist)) { - br_rtr_notify(br->dev, port, RTM_NEWMDB); - br_port_mc_router_state_change(port, true); + if (br_multicast_no_router_otherpf(pmctx, rlist)) { + br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_NEWMDB); + br_port_mc_router_state_change(pmctx->port, true); } } @@ -2790,116 +2992,119 @@ static void br_multicast_add_router(struct net_bridge *br, * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -static void br_ip4_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { - br_multicast_add_router(br, port, &port->ip4_rlist, - &br->ip4_mc_router_list); + br_multicast_add_router(brmctx, pmctx, &pmctx->ip4_rlist, + &brmctx->ip4_mc_router_list); } /* Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -static void br_ip6_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) - br_multicast_add_router(br, port, &port->ip6_rlist, - &br->ip6_mc_router_list); + br_multicast_add_router(brmctx, pmctx, &pmctx->ip6_rlist, + &brmctx->ip6_mc_router_list); #endif } -static void br_multicast_mark_router(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_mark_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct timer_list *timer, struct hlist_node *rlist, struct hlist_head *mc_router_list) { unsigned long now = jiffies; - if (!port) { - if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { - if (!br_ip4_multicast_is_router(br) && - !br_ip6_multicast_is_router(br)) - br_mc_router_state_change(br, true); - mod_timer(timer, now + br->multicast_querier_interval); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + return; + + if (!pmctx) { + if (brmctx->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { + if (!br_ip4_multicast_is_router(brmctx) && + !br_ip6_multicast_is_router(brmctx)) + br_mc_router_state_change(brmctx->br, true); + mod_timer(timer, now + brmctx->multicast_querier_interval); } return; } - if (port->multicast_router == MDB_RTR_TYPE_DISABLED || - port->multicast_router == MDB_RTR_TYPE_PERM) + if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED || + pmctx->multicast_router == MDB_RTR_TYPE_PERM) return; - br_multicast_add_router(br, port, rlist, mc_router_list); - mod_timer(timer, now + br->multicast_querier_interval); + br_multicast_add_router(brmctx, pmctx, rlist, mc_router_list); + mod_timer(timer, now + brmctx->multicast_querier_interval); } -static void br_ip4_multicast_mark_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip4_multicast_mark_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { - struct timer_list *timer = &br->ip4_mc_router_timer; + struct timer_list *timer = &brmctx->ip4_mc_router_timer; struct hlist_node *rlist = NULL; - if (port) { - timer = &port->ip4_mc_router_timer; - rlist = &port->ip4_rlist; + if (pmctx) { + timer = &pmctx->ip4_mc_router_timer; + rlist = &pmctx->ip4_rlist; } - br_multicast_mark_router(br, port, timer, rlist, - &br->ip4_mc_router_list); + br_multicast_mark_router(brmctx, pmctx, timer, rlist, + &brmctx->ip4_mc_router_list); } -static void br_ip6_multicast_mark_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip6_multicast_mark_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) - struct timer_list *timer = &br->ip6_mc_router_timer; + struct timer_list *timer = &brmctx->ip6_mc_router_timer; struct hlist_node *rlist = NULL; - if (port) { - timer = &port->ip6_mc_router_timer; - rlist = &port->ip6_rlist; + if (pmctx) { + timer = &pmctx->ip6_mc_router_timer; + rlist = &pmctx->ip6_rlist; } - br_multicast_mark_router(br, port, timer, rlist, - &br->ip6_mc_router_list); + br_multicast_mark_router(brmctx, pmctx, timer, rlist, + &brmctx->ip6_mc_router_list); #endif } static void -br_ip4_multicast_query_received(struct net_bridge *br, - struct net_bridge_port *port, +br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct bridge_mcast_other_query *query, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip4_multicast_select_querier(br, port, saddr->src.ip4)) + if (!br_ip4_multicast_select_querier(brmctx, pmctx, saddr->src.ip4)) return; - br_multicast_update_query_timer(br, query, max_delay); - br_ip4_multicast_mark_router(br, port); + br_multicast_update_query_timer(brmctx, query, max_delay); + br_ip4_multicast_mark_router(brmctx, pmctx); } #if IS_ENABLED(CONFIG_IPV6) static void -br_ip6_multicast_query_received(struct net_bridge *br, - struct net_bridge_port *port, +br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct bridge_mcast_other_query *query, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip6_multicast_select_querier(br, port, &saddr->src.ip6)) + if (!br_ip6_multicast_select_querier(brmctx, pmctx, &saddr->src.ip6)) return; - br_multicast_update_query_timer(br, query, max_delay); - br_ip6_multicast_mark_router(br, port); + br_multicast_update_query_timer(brmctx, query, max_delay); + br_ip6_multicast_mark_router(brmctx, pmctx); } #endif -static void br_ip4_multicast_query(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { @@ -2915,9 +3120,8 @@ static void br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; group = ih->group; @@ -2932,7 +3136,8 @@ static void br_ip4_multicast_query(struct net_bridge *br, } else if (transport_len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs || - (br->multicast_igmp_version == 3 && group && ih3->suppress)) + (brmctx->multicast_igmp_version == 3 && group && + ih3->suppress)) goto out; max_delay = ih3->code ? @@ -2945,16 +3150,17 @@ static void br_ip4_multicast_query(struct net_bridge *br, saddr.proto = htons(ETH_P_IP); saddr.src.ip4 = iph->saddr; - br_ip4_multicast_query_received(br, port, &br->ip4_other_query, + br_ip4_multicast_query_received(brmctx, pmctx, + &brmctx->ip4_other_query, &saddr, max_delay); goto out; } - mp = br_mdb_ip4_get(br, group, vid); + mp = br_mdb_ip4_get(brmctx->br, group, vid); if (!mp) goto out; - max_delay *= br->multicast_last_member_count; + max_delay *= brmctx->multicast_last_member_count; if (mp->host_joined && (timer_pending(&mp->timer) ? @@ -2963,23 +3169,23 @@ static void br_ip4_multicast_query(struct net_bridge *br, mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0 && - (br->multicast_igmp_version == 2 || + (brmctx->multicast_igmp_version == 2 || p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } #if IS_ENABLED(CONFIG_IPV6) -static int br_ip6_multicast_query(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { @@ -2997,9 +3203,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, bool is_general_query; int err = 0; - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; if (transport_len == sizeof(*mld)) { @@ -3019,7 +3224,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - if (br->multicast_mld_version == 2 && + if (brmctx->multicast_mld_version == 2 && !ipv6_addr_any(&mld2q->mld2q_mca) && mld2q->mld2q_suppress) goto out; @@ -3033,18 +3238,19 @@ static int br_ip6_multicast_query(struct net_bridge *br, saddr.proto = htons(ETH_P_IPV6); saddr.src.ip6 = ipv6_hdr(skb)->saddr; - br_ip6_multicast_query_received(br, port, &br->ip6_other_query, + br_ip6_multicast_query_received(brmctx, pmctx, + &brmctx->ip6_other_query, &saddr, max_delay); goto out; } else if (!group) { goto out; } - mp = br_mdb_ip6_get(br, group, vid); + mp = br_mdb_ip6_get(brmctx->br, group, vid); if (!mp) goto out; - max_delay *= br->multicast_last_member_count; + max_delay *= brmctx->multicast_last_member_count; if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : @@ -3052,25 +3258,25 @@ static int br_ip6_multicast_query(struct net_bridge *br, mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0 && - (br->multicast_mld_version == 1 || + (brmctx->multicast_mld_version == 1 || p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); return err; } #endif static void -br_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +br_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, struct bridge_mcast_other_query *other_query, struct bridge_mcast_own_query *own_query, @@ -3081,22 +3287,21 @@ br_multicast_leave_group(struct net_bridge *br, unsigned long now; unsigned long time; - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; - mp = br_mdb_ip_get(br, group); + mp = br_mdb_ip_get(brmctx->br, group); if (!mp) goto out; - if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + if (pmctx && (pmctx->port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { - if (!br_port_group_equal(p, port, src)) + if (!br_port_group_equal(p, pmctx->port, src)) continue; if (p->flags & MDB_PG_FLAGS_PERMANENT) @@ -3111,19 +3316,19 @@ br_multicast_leave_group(struct net_bridge *br, if (timer_pending(&other_query->timer)) goto out; - if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) { - __br_multicast_send_query(br, port, NULL, NULL, &mp->addr, + if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) { + __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr, false, 0, NULL); - time = jiffies + br->multicast_last_member_count * - br->multicast_last_member_interval; + time = jiffies + brmctx->multicast_last_member_count * + brmctx->multicast_last_member_interval; mod_timer(&own_query->timer, time); - for (p = mlock_dereference(mp->ports, br); - p != NULL; - p = mlock_dereference(p->next, br)) { - if (!br_port_group_equal(p, port, src)) + for (p = mlock_dereference(mp->ports, brmctx->br); + p != NULL && pmctx != NULL; + p = mlock_dereference(p->next, brmctx->br)) { + if (!br_port_group_equal(p, pmctx->port, src)) continue; if (!hlist_unhashed(&p->mglist) && @@ -3138,10 +3343,10 @@ br_multicast_leave_group(struct net_bridge *br, } now = jiffies; - time = now + br->multicast_last_member_count * - br->multicast_last_member_interval; + time = now + brmctx->multicast_last_member_count * + brmctx->multicast_last_member_interval; - if (!port) { + if (!pmctx) { if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : @@ -3152,10 +3357,10 @@ br_multicast_leave_group(struct net_bridge *br, goto out; } - for (p = mlock_dereference(mp->ports, br); + for (p = mlock_dereference(mp->ports, brmctx->br); p != NULL; - p = mlock_dereference(p->next, br)) { - if (p->key.port != port) + p = mlock_dereference(p->next, brmctx->br)) { + if (p->key.port != pmctx->port) continue; if (!hlist_unhashed(&p->mglist) && @@ -3168,11 +3373,11 @@ br_multicast_leave_group(struct net_bridge *br, break; } out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } -static void br_ip4_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src) @@ -3183,20 +3388,21 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, if (ipv4_is_local_multicast(group)) return; - own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + own_query = pmctx ? &pmctx->ip4_own_query : &brmctx->ip4_own_query; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, + br_multicast_leave_group(brmctx, pmctx, &br_group, + &brmctx->ip4_other_query, own_query, src); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src) @@ -3207,14 +3413,15 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, if (ipv6_addr_is_ll_all_nodes(group)) return; - own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + own_query = pmctx ? &pmctx->ip6_own_query : &brmctx->ip6_own_query; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, + br_multicast_leave_group(brmctx, pmctx, &br_group, + &brmctx->ip6_other_query, own_query, src); } #endif @@ -3252,8 +3459,8 @@ static void br_multicast_err_count(const struct net_bridge *br, u64_stats_update_end(&pstats->syncp); } -static void br_multicast_pim(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_pim(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct sk_buff *skb) { unsigned int offset = skb_transport_offset(skb); @@ -3264,31 +3471,32 @@ static void br_multicast_pim(struct net_bridge *br, pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) return; - spin_lock(&br->multicast_lock); - br_ip4_multicast_mark_router(br, port); - spin_unlock(&br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); + br_ip4_multicast_mark_router(brmctx, pmctx); + spin_unlock(&brmctx->br->multicast_lock); } -static int br_ip4_multicast_mrd_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip4_multicast_mrd_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb) { if (ip_hdr(skb)->protocol != IPPROTO_IGMP || igmp_hdr(skb)->type != IGMP_MRDISC_ADV) return -ENOMSG; - spin_lock(&br->multicast_lock); - br_ip4_multicast_mark_router(br, port); - spin_unlock(&br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); + br_ip4_multicast_mark_router(brmctx, pmctx); + spin_unlock(&brmctx->br->multicast_lock); return 0; } -static int br_multicast_ipv4_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static int br_multicast_ipv4_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { + struct net_bridge_port *p = pmctx ? pmctx->port : NULL; const unsigned char *src; struct igmphdr *ih; int err; @@ -3300,14 +3508,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { if (ip_hdr(skb)->protocol == IPPROTO_PIM) - br_multicast_pim(br, port, skb); + br_multicast_pim(brmctx, pmctx, skb); } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) { - br_ip4_multicast_mrd_rcv(br, port, skb); + br_ip4_multicast_mrd_rcv(brmctx, pmctx, skb); } return 0; } else if (err < 0) { - br_multicast_err_count(br, port, skb->protocol); + br_multicast_err_count(brmctx->br, p, skb->protocol); return err; } @@ -3319,44 +3527,45 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip4_multicast_add_group(br, port, ih->group, vid, src, - true); + err = br_ip4_multicast_add_group(brmctx, pmctx, ih->group, vid, + src, true); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb, vid); + err = br_ip4_multicast_igmp3_report(brmctx, pmctx, skb, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - br_ip4_multicast_query(br, port, skb, vid); + br_ip4_multicast_query(brmctx, pmctx, skb, vid); break; case IGMP_HOST_LEAVE_MESSAGE: - br_ip4_multicast_leave_group(br, port, ih->group, vid, src); + br_ip4_multicast_leave_group(brmctx, pmctx, ih->group, vid, src); break; } - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, + br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); return err; } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_mrd_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip6_multicast_mrd_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb) { if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV) return; - spin_lock(&br->multicast_lock); - br_ip6_multicast_mark_router(br, port); - spin_unlock(&br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); + br_ip6_multicast_mark_router(brmctx, pmctx); + spin_unlock(&brmctx->br->multicast_lock); } -static int br_multicast_ipv6_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static int br_multicast_ipv6_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { + struct net_bridge_port *p = pmctx ? pmctx->port : NULL; const unsigned char *src; struct mld_msg *mld; int err; @@ -3368,11 +3577,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; if (err == -ENODATA && ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) - br_ip6_multicast_mrd_rcv(br, port, skb); + br_ip6_multicast_mrd_rcv(brmctx, pmctx, skb); return 0; } else if (err < 0) { - br_multicast_err_count(br, port, skb->protocol); + br_multicast_err_count(brmctx->br, p, skb->protocol); return err; } @@ -3383,29 +3592,32 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, case ICMPV6_MGM_REPORT: src = eth_hdr(skb)->h_source; BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid, - src, true); + err = br_ip6_multicast_add_group(brmctx, pmctx, &mld->mld_mca, + vid, src, true); break; case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb, vid); + err = br_ip6_multicast_mld2_report(brmctx, pmctx, skb, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb, vid); + err = br_ip6_multicast_query(brmctx, pmctx, skb, vid); break; case ICMPV6_MGM_REDUCTION: src = eth_hdr(skb)->h_source; - br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src); + br_ip6_multicast_leave_group(brmctx, pmctx, &mld->mld_mca, vid, + src); break; } - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, + br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); return err; } #endif -int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, +int br_multicast_rcv(struct net_bridge_mcast **brmctx, + struct net_bridge_mcast_port **pmctx, + struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid) { int ret = 0; @@ -3413,16 +3625,36 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!br_opt_get((*brmctx)->br, BROPT_MULTICAST_ENABLED)) return 0; + if (br_opt_get((*brmctx)->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && vlan) { + const struct net_bridge_vlan *masterv; + + /* the vlan has the master flag set only when transmitting + * through the bridge device + */ + if (br_vlan_is_master(vlan)) { + masterv = vlan; + *brmctx = &vlan->br_mcast_ctx; + *pmctx = NULL; + } else { + masterv = vlan->brvlan; + *brmctx = &vlan->brvlan->br_mcast_ctx; + *pmctx = &vlan->port_mcast_ctx; + } + + if (!(masterv->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) + return 0; + } + switch (skb->protocol) { case htons(ETH_P_IP): - ret = br_multicast_ipv4_rcv(br, port, skb, vid); + ret = br_multicast_ipv4_rcv(*brmctx, *pmctx, skb, vid); break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - ret = br_multicast_ipv6_rcv(br, port, skb, vid); + ret = br_multicast_ipv6_rcv(*brmctx, *pmctx, skb, vid); break; #endif } @@ -3430,32 +3662,40 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, return ret; } -static void br_multicast_query_expired(struct net_bridge *br, +static void br_multicast_query_expired(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query, struct bridge_mcast_querier *querier) { - spin_lock(&br->multicast_lock); - if (query->startup_sent < br->multicast_startup_query_count) + spin_lock(&brmctx->br->multicast_lock); + if (br_multicast_ctx_vlan_disabled(brmctx)) + goto out; + + if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; RCU_INIT_POINTER(querier->port, NULL); - br_multicast_send_query(br, NULL, query); - spin_unlock(&br->multicast_lock); + br_multicast_send_query(brmctx, NULL, query); +out: + spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip4_own_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip4_own_query.timer); - br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); + br_multicast_query_expired(brmctx, &brmctx->ip4_own_query, + &brmctx->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip6_own_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip6_own_query.timer); - br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); + br_multicast_query_expired(brmctx, &brmctx->ip6_own_query, + &brmctx->ip6_querier); } #endif @@ -3472,47 +3712,63 @@ static void br_multicast_gc_work(struct work_struct *work) br_multicast_gc(&deleted_head); } -void br_multicast_init(struct net_bridge *br) +void br_multicast_ctx_init(struct net_bridge *br, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast *brmctx) { - br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX; - - br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - br->multicast_last_member_count = 2; - br->multicast_startup_query_count = 2; + brmctx->br = br; + brmctx->vlan = vlan; + brmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + brmctx->multicast_last_member_count = 2; + brmctx->multicast_startup_query_count = 2; - br->multicast_last_member_interval = HZ; - br->multicast_query_response_interval = 10 * HZ; - br->multicast_startup_query_interval = 125 * HZ / 4; - br->multicast_query_interval = 125 * HZ; - br->multicast_querier_interval = 255 * HZ; - br->multicast_membership_interval = 260 * HZ; + brmctx->multicast_last_member_interval = HZ; + brmctx->multicast_query_response_interval = 10 * HZ; + brmctx->multicast_startup_query_interval = 125 * HZ / 4; + brmctx->multicast_query_interval = 125 * HZ; + brmctx->multicast_querier_interval = 255 * HZ; + brmctx->multicast_membership_interval = 260 * HZ; - br->ip4_other_query.delay_time = 0; - br->ip4_querier.port = NULL; - br->multicast_igmp_version = 2; + brmctx->ip4_other_query.delay_time = 0; + brmctx->ip4_querier.port = NULL; + brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) - br->multicast_mld_version = 1; - br->ip6_other_query.delay_time = 0; - br->ip6_querier.port = NULL; + brmctx->multicast_mld_version = 1; + brmctx->ip6_other_query.delay_time = 0; + brmctx->ip6_querier.port = NULL; #endif - br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true); - br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); - spin_lock_init(&br->multicast_lock); - timer_setup(&br->ip4_mc_router_timer, + timer_setup(&brmctx->ip4_mc_router_timer, br_ip4_multicast_local_router_expired, 0); - timer_setup(&br->ip4_other_query.timer, + timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); - timer_setup(&br->ip4_own_query.timer, + timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - timer_setup(&br->ip6_mc_router_timer, + timer_setup(&brmctx->ip6_mc_router_timer, br_ip6_multicast_local_router_expired, 0); - timer_setup(&br->ip6_other_query.timer, + timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); - timer_setup(&br->ip6_own_query.timer, + timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif +} + +void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx) +{ + __br_multicast_stop(brmctx); +} + +void br_multicast_init(struct net_bridge *br) +{ + br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX; + + br_multicast_ctx_init(br, NULL, &br->multicast_ctx); + + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true); + br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); + + spin_lock_init(&br->multicast_lock); INIT_HLIST_HEAD(&br->mdb_list); INIT_HLIST_HEAD(&br->mcast_gc_list); INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work); @@ -3580,8 +3836,8 @@ void br_multicast_leave_snoopers(struct net_bridge *br) br_ip6_multicast_leave_snoopers(br); } -static void __br_multicast_open(struct net_bridge *br, - struct bridge_mcast_own_query *query) +static void __br_multicast_open_query(struct net_bridge *br, + struct bridge_mcast_own_query *query) { query->startup_sent = 0; @@ -3591,26 +3847,191 @@ static void __br_multicast_open(struct net_bridge *br, mod_timer(&query->timer, jiffies); } -void br_multicast_open(struct net_bridge *br) +static void __br_multicast_open(struct net_bridge_mcast *brmctx) { - __br_multicast_open(br, &br->ip4_own_query); + __br_multicast_open_query(brmctx->br, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - __br_multicast_open(br, &br->ip6_own_query); + __br_multicast_open_query(brmctx->br, &brmctx->ip6_own_query); #endif } -void br_multicast_stop(struct net_bridge *br) +void br_multicast_open(struct net_bridge *br) +{ + ASSERT_RTNL(); + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + vg = br_vlan_group(br); + if (vg) { + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + struct net_bridge_mcast *brmctx; + + brmctx = &vlan->br_mcast_ctx; + if (br_vlan_is_brentry(vlan) && + !br_multicast_ctx_vlan_disabled(brmctx)) + __br_multicast_open(&vlan->br_mcast_ctx); + } + } + } + + __br_multicast_open(&br->multicast_ctx); +} + +static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { - del_timer_sync(&br->ip4_mc_router_timer); - del_timer_sync(&br->ip4_other_query.timer); - del_timer_sync(&br->ip4_own_query.timer); + del_timer_sync(&brmctx->ip4_mc_router_timer); + del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&br->ip6_mc_router_timer); - del_timer_sync(&br->ip6_other_query.timer); - del_timer_sync(&br->ip6_own_query.timer); + del_timer_sync(&brmctx->ip6_mc_router_timer); + del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_own_query.timer); #endif } +void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) +{ + struct net_bridge *br; + + /* it's okay to check for the flag without the multicast lock because it + * can only change under RTNL -> multicast_lock, we need the latter to + * sync with timers and packets + */ + if (on == !!(vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) + return; + + if (br_vlan_is_master(vlan)) { + br = vlan->br; + + if (!br_vlan_is_brentry(vlan) || + (on && + br_multicast_ctx_vlan_global_disabled(&vlan->br_mcast_ctx))) + return; + + spin_lock_bh(&br->multicast_lock); + vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED; + spin_unlock_bh(&br->multicast_lock); + + if (on) + __br_multicast_open(&vlan->br_mcast_ctx); + else + __br_multicast_stop(&vlan->br_mcast_ctx); + } else { + struct net_bridge_mcast *brmctx; + + brmctx = br_multicast_port_ctx_get_global(&vlan->port_mcast_ctx); + if (on && br_multicast_ctx_vlan_global_disabled(brmctx)) + return; + + br = vlan->port->br; + spin_lock_bh(&br->multicast_lock); + vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED; + if (on) + __br_multicast_enable_port_ctx(&vlan->port_mcast_ctx); + else + __br_multicast_disable_port_ctx(&vlan->port_mcast_ctx); + spin_unlock_bh(&br->multicast_lock); + } +} + +void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) +{ + struct net_bridge_port *p; + + if (WARN_ON_ONCE(!br_vlan_is_master(vlan))) + return; + + list_for_each_entry(p, &vlan->br->port_list, list) { + struct net_bridge_vlan *vport; + + vport = br_vlan_find(nbp_vlan_group(p), vlan->vid); + if (!vport) + continue; + br_multicast_toggle_one_vlan(vport, on); + } +} + +int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + struct net_bridge_port *p; + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) == on) + return 0; + + if (on && !br_opt_get(br, BROPT_VLAN_ENABLED)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot enable multicast vlan snooping with vlan filtering disabled"); + return -EINVAL; + } + + vg = br_vlan_group(br); + if (!vg) + return 0; + + br_opt_toggle(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED, on); + + /* disable/enable non-vlan mcast contexts based on vlan snooping */ + if (on) + __br_multicast_stop(&br->multicast_ctx); + else + __br_multicast_open(&br->multicast_ctx); + list_for_each_entry(p, &br->port_list, list) { + if (on) + br_multicast_disable_port(p); + else + br_multicast_enable_port(p); + } + + list_for_each_entry(vlan, &vg->vlan_list, vlist) + br_multicast_toggle_vlan(vlan, on); + + return 0; +} + +bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on) +{ + ASSERT_RTNL(); + + /* BR_VLFLAG_GLOBAL_MCAST_ENABLED relies on eventual consistency and + * requires only RTNL to change + */ + if (on == !!(vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) + return false; + + vlan->priv_flags ^= BR_VLFLAG_GLOBAL_MCAST_ENABLED; + br_multicast_toggle_vlan(vlan, on); + + return true; +} + +void br_multicast_stop(struct net_bridge *br) +{ + ASSERT_RTNL(); + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + vg = br_vlan_group(br); + if (vg) { + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + struct net_bridge_mcast *brmctx; + + brmctx = &vlan->br_mcast_ctx; + if (br_vlan_is_brentry(vlan) && + !br_multicast_ctx_vlan_disabled(brmctx)) + __br_multicast_stop(&vlan->br_mcast_ctx); + } + } + } + + __br_multicast_stop(&br->multicast_ctx); +} + void br_multicast_dev_del(struct net_bridge *br) { struct net_bridge_mdb_entry *mp; @@ -3623,6 +4044,7 @@ void br_multicast_dev_del(struct net_bridge *br) hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); + br_multicast_ctx_deinit(&br->multicast_ctx); br_multicast_gc(&deleted_head); cancel_work_sync(&br->mcast_gc_work); @@ -3631,6 +4053,7 @@ void br_multicast_dev_del(struct net_bridge *br) int br_multicast_set_router(struct net_bridge *br, unsigned long val) { + struct net_bridge_mcast *brmctx = &br->multicast_ctx; int err = -EINVAL; spin_lock_bh(&br->multicast_lock); @@ -3639,17 +4062,17 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val) case MDB_RTR_TYPE_DISABLED: case MDB_RTR_TYPE_PERM: br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM); - del_timer(&br->ip4_mc_router_timer); + del_timer(&brmctx->ip4_mc_router_timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&br->ip6_mc_router_timer); + del_timer(&brmctx->ip6_mc_router_timer); #endif - br->multicast_router = val; + brmctx->multicast_router = val; err = 0; break; case MDB_RTR_TYPE_TEMP_QUERY: - if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) + if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) br_mc_router_state_change(br, false); - br->multicast_router = val; + brmctx->multicast_router = val; err = 0; break; } @@ -3660,7 +4083,7 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val) } static void -br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted) +br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted) { if (!deleted) return; @@ -3668,37 +4091,38 @@ br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted) /* For backwards compatibility for now, only notify if there is * no multicast router anymore for both IPv4 and IPv6. */ - if (!hlist_unhashed(&p->ip4_rlist)) + if (!hlist_unhashed(&pmctx->ip4_rlist)) return; #if IS_ENABLED(CONFIG_IPV6) - if (!hlist_unhashed(&p->ip6_rlist)) + if (!hlist_unhashed(&pmctx->ip6_rlist)) return; #endif - br_rtr_notify(p->br->dev, p, RTM_DELMDB); - br_port_mc_router_state_change(p, false); + br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_DELMDB); + br_port_mc_router_state_change(pmctx->port, false); /* don't allow timer refresh */ - if (p->multicast_router == MDB_RTR_TYPE_TEMP) - p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) + pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; } int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) { - struct net_bridge *br = p->br; + struct net_bridge_mcast *brmctx = &p->br->multicast_ctx; + struct net_bridge_mcast_port *pmctx = &p->multicast_ctx; unsigned long now = jiffies; int err = -EINVAL; bool del = false; - spin_lock(&br->multicast_lock); - if (p->multicast_router == val) { + spin_lock(&p->br->multicast_lock); + if (pmctx->multicast_router == val) { /* Refresh the temp router port timer */ - if (p->multicast_router == MDB_RTR_TYPE_TEMP) { - mod_timer(&p->ip4_mc_router_timer, - now + br->multicast_querier_interval); + if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) { + mod_timer(&pmctx->ip4_mc_router_timer, + now + brmctx->multicast_querier_interval); #if IS_ENABLED(CONFIG_IPV6) - mod_timer(&p->ip6_mc_router_timer, - now + br->multicast_querier_interval); + mod_timer(&pmctx->ip6_mc_router_timer, + now + brmctx->multicast_querier_interval); #endif } err = 0; @@ -3706,63 +4130,86 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) } switch (val) { case MDB_RTR_TYPE_DISABLED: - p->multicast_router = MDB_RTR_TYPE_DISABLED; - del |= br_ip4_multicast_rport_del(p); - del_timer(&p->ip4_mc_router_timer); - del |= br_ip6_multicast_rport_del(p); + pmctx->multicast_router = MDB_RTR_TYPE_DISABLED; + del |= br_ip4_multicast_rport_del(pmctx); + del_timer(&pmctx->ip4_mc_router_timer); + del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&p->ip6_mc_router_timer); + del_timer(&pmctx->ip6_mc_router_timer); #endif - br_multicast_rport_del_notify(p, del); + br_multicast_rport_del_notify(pmctx, del); break; case MDB_RTR_TYPE_TEMP_QUERY: - p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - del |= br_ip4_multicast_rport_del(p); - del |= br_ip6_multicast_rport_del(p); - br_multicast_rport_del_notify(p, del); + pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + del |= br_ip4_multicast_rport_del(pmctx); + del |= br_ip6_multicast_rport_del(pmctx); + br_multicast_rport_del_notify(pmctx, del); break; case MDB_RTR_TYPE_PERM: - p->multicast_router = MDB_RTR_TYPE_PERM; - del_timer(&p->ip4_mc_router_timer); - br_ip4_multicast_add_router(br, p); + pmctx->multicast_router = MDB_RTR_TYPE_PERM; + del_timer(&pmctx->ip4_mc_router_timer); + br_ip4_multicast_add_router(brmctx, pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&p->ip6_mc_router_timer); + del_timer(&pmctx->ip6_mc_router_timer); #endif - br_ip6_multicast_add_router(br, p); + br_ip6_multicast_add_router(brmctx, pmctx); break; case MDB_RTR_TYPE_TEMP: - p->multicast_router = MDB_RTR_TYPE_TEMP; - br_ip4_multicast_mark_router(br, p); - br_ip6_multicast_mark_router(br, p); + pmctx->multicast_router = MDB_RTR_TYPE_TEMP; + br_ip4_multicast_mark_router(brmctx, pmctx); + br_ip6_multicast_mark_router(brmctx, pmctx); break; default: goto unlock; } err = 0; unlock: - spin_unlock(&br->multicast_lock); + spin_unlock(&p->br->multicast_lock); return err; } -static void br_multicast_start_querier(struct net_bridge *br, +static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query) { struct net_bridge_port *port; - __br_multicast_open(br, query); + __br_multicast_open_query(brmctx->br, query); rcu_read_lock(); - list_for_each_entry_rcu(port, &br->port_list, list) { - if (port->state == BR_STATE_DISABLED || - port->state == BR_STATE_BLOCKING) + list_for_each_entry_rcu(port, &brmctx->br->port_list, list) { + struct bridge_mcast_own_query *ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_own_query *ip6_own_query; +#endif + + if (br_multicast_port_ctx_state_stopped(&port->multicast_ctx)) continue; - if (query == &br->ip4_own_query) - br_multicast_enable(&port->ip4_own_query); + if (br_multicast_ctx_is_vlan(brmctx)) { + struct net_bridge_vlan *vlan; + + vlan = br_vlan_find(nbp_vlan_group(port), brmctx->vlan->vid); + if (!vlan || + br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx)) + continue; + + ip4_own_query = &vlan->port_mcast_ctx.ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + ip6_own_query = &vlan->port_mcast_ctx.ip6_own_query; +#endif + } else { + ip4_own_query = &port->multicast_ctx.ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + ip6_own_query = &port->multicast_ctx.ip6_own_query; +#endif + } + + if (query == &brmctx->ip4_own_query) + br_multicast_enable(ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) else - br_multicast_enable(&port->ip6_own_query); + br_multicast_enable(ip6_own_query); #endif } rcu_read_unlock(); @@ -3796,7 +4243,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, br_multicast_open(br); list_for_each_entry(port, &br->port_list, list) - __br_multicast_enable_port(port); + __br_multicast_enable_port_ctx(&port->multicast_ctx); change_snoopers = true; @@ -3839,7 +4286,7 @@ bool br_multicast_router(const struct net_device *dev) bool is_router; spin_lock_bh(&br->multicast_lock); - is_router = br_multicast_is_router(br, NULL); + is_router = br_multicast_is_router(&br->multicast_ctx, NULL); spin_unlock_bh(&br->multicast_lock); return is_router; } @@ -3847,6 +4294,7 @@ EXPORT_SYMBOL_GPL(br_multicast_router); int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { + struct net_bridge_mcast *brmctx = &br->multicast_ctx; unsigned long max_delay; val = !!val; @@ -3859,18 +4307,18 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) if (!val) goto unlock; - max_delay = br->multicast_query_response_interval; + max_delay = brmctx->multicast_query_response_interval; - if (!timer_pending(&br->ip4_other_query.timer)) - br->ip4_other_query.delay_time = jiffies + max_delay; + if (!timer_pending(&brmctx->ip4_other_query.timer)) + brmctx->ip4_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip4_own_query); + br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - if (!timer_pending(&br->ip6_other_query.timer)) - br->ip6_other_query.delay_time = jiffies + max_delay; + if (!timer_pending(&brmctx->ip6_other_query.timer)) + brmctx->ip6_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip6_own_query); + br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif unlock: @@ -3891,7 +4339,7 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val) } spin_lock_bh(&br->multicast_lock); - br->multicast_igmp_version = val; + br->multicast_ctx.multicast_igmp_version = val; spin_unlock_bh(&br->multicast_lock); return 0; @@ -3910,7 +4358,7 @@ int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val) } spin_lock_bh(&br->multicast_lock); - br->multicast_mld_version = val; + br->multicast_ctx.multicast_mld_version = val; spin_unlock_bh(&br->multicast_lock); return 0; @@ -4003,7 +4451,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) memset(ð, 0, sizeof(eth)); eth.h_proto = htons(proto); - ret = br_multicast_querier_exists(br, ð, NULL); + ret = br_multicast_querier_exists(&br->multicast_ctx, ð, NULL); unlock: rcu_read_unlock(); @@ -4022,6 +4470,7 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere); */ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) { + struct net_bridge_mcast *brmctx; struct net_bridge *br; struct net_bridge_port *port; bool ret = false; @@ -4035,17 +4484,18 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) goto unlock; br = port->br; + brmctx = &br->multicast_ctx; switch (proto) { case ETH_P_IP: - if (!timer_pending(&br->ip4_other_query.timer) || - rcu_dereference(br->ip4_querier.port) == port) + if (!timer_pending(&brmctx->ip4_other_query.timer) || + rcu_dereference(brmctx->ip4_querier.port) == port) goto unlock; break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: - if (!timer_pending(&br->ip6_other_query.timer) || - rcu_dereference(br->ip6_querier.port) == port) + if (!timer_pending(&brmctx->ip6_other_query.timer) || + rcu_dereference(brmctx->ip6_querier.port) == port) goto unlock; break; #endif @@ -4071,7 +4521,9 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); */ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto) { - struct net_bridge_port *port, *p; + struct net_bridge_mcast_port *pmctx; + struct net_bridge_mcast *brmctx; + struct net_bridge_port *port; bool ret = false; rcu_read_lock(); @@ -4079,11 +4531,12 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto) if (!port) goto unlock; + brmctx = &port->br->multicast_ctx; switch (proto) { case ETH_P_IP: - hlist_for_each_entry_rcu(p, &port->br->ip4_mc_router_list, + hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list, ip4_rlist) { - if (p == port) + if (pmctx->port == port) continue; ret = true; @@ -4092,9 +4545,9 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto) break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: - hlist_for_each_entry_rcu(p, &port->br->ip6_mc_router_list, + hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list, ip6_rlist) { - if (p == port) + if (pmctx->port == port) continue; ret = true; @@ -4186,7 +4639,8 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats, u64_stats_update_end(&pstats->syncp); } -void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, +void br_multicast_count(struct net_bridge *br, + const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir) { struct bridge_mcast_stats __percpu *stats; diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 13290a749d09..f91c071d1608 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -33,7 +33,8 @@ static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr); -static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, +static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr, int filter_mode, @@ -388,7 +389,8 @@ static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src, } } -static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg, +static void br_eht_convert_host_filter_mode(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, int filter_mode) { @@ -405,14 +407,15 @@ static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg, br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr); break; case MCAST_EXCLUDE: - br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr, - MCAST_EXCLUDE, + br_multicast_create_eht_set_entry(brmctx, pg, &zero_addr, + h_addr, MCAST_EXCLUDE, true); break; } } -static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, +static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr, int filter_mode, @@ -441,8 +444,8 @@ static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, if (!set_h) goto fail_set_entry; - mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br)); - mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&set_h->timer, jiffies + br_multicast_gmi(brmctx)); + mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(brmctx)); return; @@ -499,7 +502,8 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg, } /* create new set entries from reports */ -static void __eht_create_set_entries(struct net_bridge_port_group *pg, +static void __eht_create_set_entries(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -512,8 +516,8 @@ static void __eht_create_set_entries(struct net_bridge_port_group *pg, memset(&eht_src_addr, 0, sizeof(eht_src_addr)); for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); - br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, - filter_mode, + br_multicast_create_eht_set_entry(brmctx, pg, &eht_src_addr, + h_addr, filter_mode, false); } } @@ -549,7 +553,8 @@ static bool __eht_del_set_entries(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, +static bool br_multicast_eht_allow(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -559,8 +564,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, switch (br_multicast_eht_host_filter_mode(pg, h_addr)) { case MCAST_INCLUDE: - __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size, - MCAST_INCLUDE); + __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, + addr_size, MCAST_INCLUDE); break; case MCAST_EXCLUDE: changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs, @@ -571,7 +576,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_eht_block(struct net_bridge_port_group *pg, +static bool br_multicast_eht_block(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -585,7 +591,7 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg, addr_size); break; case MCAST_EXCLUDE: - __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size, + __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size, MCAST_EXCLUDE); break; } @@ -594,7 +600,8 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg, } /* flush_entries is true when changing mode */ -static bool __eht_inc_exc(struct net_bridge_port_group *pg, +static bool __eht_inc_exc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -612,11 +619,10 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg, /* if we're changing mode del host and its entries */ if (flush_entries) br_multicast_del_eht_host(pg, h_addr); - __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size, + __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size, filter_mode); /* we can be missing sets only if we've deleted some entries */ if (flush_entries) { - struct net_bridge *br = pg->key.port->br; struct net_bridge_group_eht_set *eht_set; struct net_bridge_group_src *src_ent; struct hlist_node *tmp; @@ -647,14 +653,15 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg, &eht_src_addr); if (!eht_set) continue; - mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br)); + mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(brmctx)); } } return changed; } -static bool br_multicast_eht_inc(struct net_bridge_port_group *pg, +static bool br_multicast_eht_inc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -663,14 +670,15 @@ static bool br_multicast_eht_inc(struct net_bridge_port_group *pg, { bool changed; - changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size, MCAST_INCLUDE, to_report); - br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE); + br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_INCLUDE); return changed; } -static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, +static bool br_multicast_eht_exc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -679,14 +687,15 @@ static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, { bool changed; - changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size, MCAST_EXCLUDE, to_report); - br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE); + br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_EXCLUDE); return changed; } -static bool __eht_ip4_handle(struct net_bridge_port_group *pg, +static bool __eht_ip4_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -696,24 +705,25 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg, switch (grec_type) { case IGMPV3_ALLOW_NEW_SOURCES: - br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32)); + br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs, + sizeof(__be32)); break; case IGMPV3_BLOCK_OLD_SOURCES: - changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs, sizeof(__be32)); break; case IGMPV3_CHANGE_TO_INCLUDE: to_report = true; fallthrough; case IGMPV3_MODE_IS_INCLUDE: - changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(__be32), to_report); break; case IGMPV3_CHANGE_TO_EXCLUDE: to_report = true; fallthrough; case IGMPV3_MODE_IS_EXCLUDE: - changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(__be32), to_report); break; } @@ -722,7 +732,8 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg, } #if IS_ENABLED(CONFIG_IPV6) -static bool __eht_ip6_handle(struct net_bridge_port_group *pg, +static bool __eht_ip6_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -732,18 +743,18 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, switch (grec_type) { case MLD2_ALLOW_NEW_SOURCES: - br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, + br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr)); break; case MLD2_BLOCK_OLD_SOURCES: - changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr)); break; case MLD2_CHANGE_TO_INCLUDE: to_report = true; fallthrough; case MLD2_MODE_IS_INCLUDE: - changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr), to_report); break; @@ -751,7 +762,7 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, to_report = true; fallthrough; case MLD2_MODE_IS_EXCLUDE: - changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr), to_report); break; @@ -762,7 +773,8 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, #endif /* true means an entry was deleted */ -bool br_multicast_eht_handle(struct net_bridge_port_group *pg, +bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, @@ -779,12 +791,12 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg, memset(&eht_host_addr, 0, sizeof(eht_host_addr)); memcpy(&eht_host_addr, h_addr, addr_size); if (addr_size == sizeof(__be32)) - changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs, - grec_type); + changed = __eht_ip4_handle(brmctx, pg, &eht_host_addr, srcs, + nsrcs, grec_type); #if IS_ENABLED(CONFIG_IPV6) else - changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs, - grec_type); + changed = __eht_ip6_handle(brmctx, pg, &eht_host_addr, srcs, + nsrcs, grec_type); #endif out: diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8642e56059fb..616a1b6dec3c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -287,7 +287,7 @@ static int br_port_fill_attrs(struct sk_buff *skb, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER, - p->multicast_router) || + p->multicast_ctx.multicast_router) || nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, p->multicast_eht_hosts_limit) || nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, @@ -1324,49 +1324,49 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) { u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]); - br->multicast_last_member_count = val; + br->multicast_ctx.multicast_last_member_count = val; } if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) { u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]); - br->multicast_startup_query_count = val; + br->multicast_ctx.multicast_startup_query_count = val; } if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]); - br->multicast_last_member_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]); - br->multicast_membership_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_QUERIER_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]); - br->multicast_querier_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); - br->multicast_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]); - br->multicast_query_response_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); - br->multicast_startup_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_STATS_ENABLED]) { @@ -1566,7 +1566,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) || + if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, + br->multicast_ctx.multicast_router) || nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, br_opt_get(br, BROPT_MULTICAST_ENABLED)) || nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR, @@ -1578,38 +1579,38 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) || nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) || nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT, - br->multicast_last_member_count) || + br->multicast_ctx.multicast_last_member_count) || nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT, - br->multicast_startup_query_count) || + br->multicast_ctx.multicast_startup_query_count) || nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION, - br->multicast_igmp_version)) + br->multicast_ctx.multicast_igmp_version)) return -EMSGSIZE; #if IS_ENABLED(CONFIG_IPV6) if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION, - br->multicast_mld_version)) + br->multicast_ctx.multicast_mld_version)) return -EMSGSIZE; #endif - clockval = jiffies_to_clock_t(br->multicast_last_member_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_membership_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_querier_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_query_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_query_response_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_startup_query_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aa64d8d63ca3..51991f1b3e5a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,8 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 +#define BR_HWDOM_MAX BITS_PER_LONG + #define BR_VERSION "2.3" /* Control of forwarding link local multicast */ @@ -89,6 +91,59 @@ struct bridge_mcast_stats { }; #endif +/* net_bridge_mcast_port must be always defined due to forwarding stubs */ +struct net_bridge_mcast_port { +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct net_bridge_port *port; + struct net_bridge_vlan *vlan; + + struct bridge_mcast_own_query ip4_own_query; + struct timer_list ip4_mc_router_timer; + struct hlist_node ip4_rlist; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_own_query ip6_own_query; + struct timer_list ip6_mc_router_timer; + struct hlist_node ip6_rlist; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + unsigned char multicast_router; +#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ +}; + +/* net_bridge_mcast must be always defined due to forwarding stubs */ +struct net_bridge_mcast { +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct net_bridge *br; + struct net_bridge_vlan *vlan; + + u32 multicast_last_member_count; + u32 multicast_startup_query_count; + + u8 multicast_igmp_version; + u8 multicast_router; +#if IS_ENABLED(CONFIG_IPV6) + u8 multicast_mld_version; +#endif + unsigned long multicast_last_member_interval; + unsigned long multicast_membership_interval; + unsigned long multicast_querier_interval; + unsigned long multicast_query_interval; + unsigned long multicast_query_response_interval; + unsigned long multicast_startup_query_interval; + struct hlist_head ip4_mc_router_list; + struct timer_list ip4_mc_router_timer; + struct bridge_mcast_other_query ip4_other_query; + struct bridge_mcast_own_query ip4_own_query; + struct bridge_mcast_querier ip4_querier; +#if IS_ENABLED(CONFIG_IPV6) + struct hlist_head ip6_mc_router_list; + struct timer_list ip6_mc_router_timer; + struct bridge_mcast_other_query ip6_other_query; + struct bridge_mcast_own_query ip6_own_query; + struct bridge_mcast_querier ip6_querier; +#endif /* IS_ENABLED(CONFIG_IPV6) */ +#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ +}; + struct br_tunnel_info { __be64 tunnel_id; struct metadata_dst __rcu *tunnel_dst; @@ -98,6 +153,8 @@ struct br_tunnel_info { enum { BR_VLFLAG_PER_PORT_STATS = BIT(0), BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1), + BR_VLFLAG_MCAST_ENABLED = BIT(2), + BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3), }; /** @@ -114,6 +171,9 @@ enum { * @refcnt: if MASTER flag set, this is bumped for each port referencing it * @brvlan: if MASTER flag unset, this points to the global per-VLAN context * for this VLAN entry + * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context + * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast + * context * @vlist: sorted list of VLAN entries * @rcu: used for entry destruction * @@ -141,6 +201,11 @@ struct net_bridge_vlan { struct br_tunnel_info tinfo; + union { + struct net_bridge_mcast br_mcast_ctx; + struct net_bridge_mcast_port port_mcast_ctx; + }; + struct list_head vlist; struct rcu_head rcu; @@ -305,19 +370,13 @@ struct net_bridge_port { struct kobject kobj; struct rcu_head rcu; + struct net_bridge_mcast_port multicast_ctx; + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - struct bridge_mcast_own_query ip4_own_query; - struct timer_list ip4_mc_router_timer; - struct hlist_node ip4_rlist; -#if IS_ENABLED(CONFIG_IPV6) - struct bridge_mcast_own_query ip6_own_query; - struct timer_list ip6_mc_router_timer; - struct hlist_node ip6_rlist; -#endif /* IS_ENABLED(CONFIG_IPV6) */ + struct bridge_mcast_stats __percpu *mcast_stats; + u32 multicast_eht_hosts_limit; u32 multicast_eht_hosts_cnt; - unsigned char multicast_router; - struct bridge_mcast_stats __percpu *mcast_stats; struct hlist_head mglist; #endif @@ -329,7 +388,12 @@ struct net_bridge_port { struct netpoll *np; #endif #ifdef CONFIG_NET_SWITCHDEV - int offload_fwd_mark; + /* Identifier used to group ports that share the same switchdev + * hardware domain. + */ + int hwdom; + int offload_count; + struct netdev_phys_item_id ppid; #endif u16 group_fwd_mask; u16 backup_redirected_cnt; @@ -376,6 +440,7 @@ enum net_bridge_opts { BROPT_VLAN_STATS_PER_PORT, BROPT_NO_LL_LEARN, BROPT_VLAN_BRIDGE_BINDING, + BROPT_MCAST_VLAN_SNOOPING_ENABLED, }; struct net_bridge { @@ -426,25 +491,14 @@ struct net_bridge { BR_USER_STP, /* new RSTP in userspace */ } stp_enabled; + struct net_bridge_mcast multicast_ctx; + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct bridge_mcast_stats __percpu *mcast_stats; u32 hash_max; - u32 multicast_last_member_count; - u32 multicast_startup_query_count; - - u8 multicast_igmp_version; - u8 multicast_router; -#if IS_ENABLED(CONFIG_IPV6) - u8 multicast_mld_version; -#endif spinlock_t multicast_lock; - unsigned long multicast_last_member_interval; - unsigned long multicast_membership_interval; - unsigned long multicast_querier_interval; - unsigned long multicast_query_interval; - unsigned long multicast_query_response_interval; - unsigned long multicast_startup_query_interval; struct rhashtable mdb_hash_tbl; struct rhashtable sg_port_tbl; @@ -452,19 +506,6 @@ struct net_bridge { struct hlist_head mcast_gc_list; struct hlist_head mdb_list; - struct hlist_head ip4_mc_router_list; - struct timer_list ip4_mc_router_timer; - struct bridge_mcast_other_query ip4_other_query; - struct bridge_mcast_own_query ip4_own_query; - struct bridge_mcast_querier ip4_querier; - struct bridge_mcast_stats __percpu *mcast_stats; -#if IS_ENABLED(CONFIG_IPV6) - struct hlist_head ip6_mc_router_list; - struct timer_list ip6_mc_router_timer; - struct bridge_mcast_other_query ip6_other_query; - struct bridge_mcast_own_query ip6_own_query; - struct bridge_mcast_querier ip6_querier; -#endif /* IS_ENABLED(CONFIG_IPV6) */ struct work_struct mcast_gc_work; #endif @@ -476,7 +517,12 @@ struct net_bridge { u32 auto_cnt; #ifdef CONFIG_NET_SWITCHDEV - int offload_fwd_mark; + /* Counter used to make sure that hardware domains get unique + * identifiers in case a bridge spans multiple switchdev instances. + */ + int last_hwdom; + /* Bit mask of hardware domain numbers in use */ + unsigned long busy_hwdoms; #endif struct hlist_head fdb_list; @@ -506,7 +552,20 @@ struct br_input_skb_cb { #endif #ifdef CONFIG_NET_SWITCHDEV - int offload_fwd_mark; + /* Set if TX data plane offloading is used towards at least one + * hardware domain. + */ + u8 tx_fwd_offload:1; + /* The switchdev hardware domain from which this packet was received. + * If skb->offload_fwd_mark was set, then this packet was already + * forwarded by hardware to the other ports in the source hardware + * domain, otherwise it wasn't. + */ + int src_hwdom; + /* Bit mask of hardware domains towards this packet has already been + * transmitted using the TX data plane offload. + */ + unsigned long fwd_hwdoms; #endif }; @@ -718,6 +777,8 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, bool swdev_notify); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool offloaded); +int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding, + struct notifier_block *nb); /* br_forward.c */ enum br_pkt_type { @@ -790,15 +851,18 @@ br_port_get_check_rtnl(const struct net_device *dev) } /* br_ioctl.c */ -int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, - void __user *arg); +int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, + void __user *data, int cmd); +int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, + struct ifreq *ifr, void __user *uarg); /* br_multicast.c */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, +int br_multicast_rcv(struct net_bridge_mcast **brmctx, + struct net_bridge_mcast_port **pmctx, + struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid); -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid); int br_multicast_add_port(struct net_bridge_port *port); void br_multicast_del_port(struct net_bridge_port *port); @@ -810,8 +874,9 @@ void br_multicast_leave_snoopers(struct net_bridge *br); void br_multicast_open(struct net_bridge *br); void br_multicast_stop(struct net_bridge *br); void br_multicast_dev_del(struct net_bridge *br); -void br_multicast_flood(struct net_bridge_mdb_entry *mdst, - struct sk_buff *skb, bool local_rcv, bool local_orig); +void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, + struct net_bridge_mcast *brmctx, + bool local_rcv, bool local_orig); int br_multicast_set_router(struct net_bridge *br, unsigned long val); int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); int br_multicast_toggle(struct net_bridge *br, unsigned long val, @@ -835,12 +900,13 @@ int br_mdb_hash_init(struct net_bridge *br); void br_mdb_hash_fini(struct net_bridge *br); void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type); -void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, +void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx, int type); void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, struct net_bridge_port_group __rcu **pp); -void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, +void br_multicast_count(struct net_bridge *br, + const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir); int br_multicast_init_stats(struct net_bridge *br); void br_multicast_uninit_stats(struct net_bridge *br); @@ -849,7 +915,8 @@ void br_multicast_get_stats(const struct net_bridge *br, struct br_mcast_stats *dest); void br_mdb_init(void); void br_mdb_uninit(void); -void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify); +void br_multicast_host_join(const struct net_bridge_mcast *brmctx, + struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, u8 filter_mode); @@ -859,6 +926,23 @@ struct net_bridge_group_src * br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip); void br_multicast_del_group_src(struct net_bridge_group_src *src, bool fastleave); +void br_multicast_ctx_init(struct net_bridge *br, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast *brmctx); +void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx); +void br_multicast_port_ctx_init(struct net_bridge_port *port, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast_port *pmctx); +void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); +void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); +void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on); +int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, + struct netlink_ext_ack *extack); +bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); + +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack); static inline bool br_group_is_l2(const struct br_ip *group) { @@ -869,52 +953,65 @@ static inline bool br_group_is_l2(const struct br_ip *group) rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) static inline struct hlist_node * -br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) { +br_multicast_get_first_rport_node(struct net_bridge_mcast *brmctx, + struct sk_buff *skb) +{ #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) - return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list)); + return rcu_dereference(hlist_first_rcu(&brmctx->ip6_mc_router_list)); #endif - return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list)); + return rcu_dereference(hlist_first_rcu(&brmctx->ip4_mc_router_list)); } static inline struct net_bridge_port * -br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) { +br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) +{ + struct net_bridge_mcast_port *mctx; + #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) - return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist); + mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port, + ip6_rlist); + else #endif - return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist); + mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port, + ip4_rlist); + + if (mctx) + return mctx->port; + else + return NULL; } -static inline bool br_ip4_multicast_is_router(struct net_bridge *br) +static inline bool br_ip4_multicast_is_router(struct net_bridge_mcast *brmctx) { - return timer_pending(&br->ip4_mc_router_timer); + return timer_pending(&brmctx->ip4_mc_router_timer); } -static inline bool br_ip6_multicast_is_router(struct net_bridge *br) +static inline bool br_ip6_multicast_is_router(struct net_bridge_mcast *brmctx) { #if IS_ENABLED(CONFIG_IPV6) - return timer_pending(&br->ip6_mc_router_timer); + return timer_pending(&brmctx->ip6_mc_router_timer); #else return false; #endif } static inline bool -br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb) +br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb) { - switch (br->multicast_router) { + switch (brmctx->multicast_router) { case MDB_RTR_TYPE_PERM: return true; case MDB_RTR_TYPE_TEMP_QUERY: if (skb) { if (skb->protocol == htons(ETH_P_IP)) - return br_ip4_multicast_is_router(br); + return br_ip4_multicast_is_router(brmctx); else if (skb->protocol == htons(ETH_P_IPV6)) - return br_ip6_multicast_is_router(br); + return br_ip6_multicast_is_router(brmctx); } else { - return br_ip4_multicast_is_router(br) || - br_ip6_multicast_is_router(br); + return br_ip4_multicast_is_router(brmctx) || + br_ip6_multicast_is_router(brmctx); } fallthrough; default: @@ -923,14 +1020,14 @@ br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb) } static inline bool -__br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier, - const bool is_ipv6) +__br_multicast_querier_exists(struct net_bridge_mcast *brmctx, + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { bool own_querier_enabled; - if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) { - if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR)) + if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) { + if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR)) own_querier_enabled = false; else own_querier_enabled = true; @@ -942,18 +1039,18 @@ __br_multicast_querier_exists(struct net_bridge *br, (own_querier_enabled || timer_pending(&querier->timer)); } -static inline bool br_multicast_querier_exists(struct net_bridge *br, +static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, struct ethhdr *eth, const struct net_bridge_mdb_entry *mdb) { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, - &br->ip4_other_query, false); + return __br_multicast_querier_exists(brmctx, + &brmctx->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, - &br->ip6_other_query, true); + return __br_multicast_querier_exists(brmctx, + &brmctx->ip6_other_query, true); #endif default: return !!mdb && br_group_is_l2(&mdb->addr); @@ -974,15 +1071,16 @@ static inline bool br_multicast_is_star_g(const struct br_ip *ip) } } -static inline bool br_multicast_should_handle_mode(const struct net_bridge *br, - __be16 proto) +static inline bool +br_multicast_should_handle_mode(const struct net_bridge_mcast *brmctx, + __be16 proto) { switch (proto) { case htons(ETH_P_IP): - return !!(br->multicast_igmp_version == 3); + return !!(brmctx->multicast_igmp_version == 3); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return !!(br->multicast_mld_version == 2); + return !!(brmctx->multicast_mld_version == 2); #endif default: return false; @@ -994,28 +1092,90 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb) return BR_INPUT_SKB_CB(skb)->igmp; } -static inline unsigned long br_multicast_lmqt(const struct net_bridge *br) +static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brmctx) { - return br->multicast_last_member_interval * - br->multicast_last_member_count; + return brmctx->multicast_last_member_interval * + brmctx->multicast_last_member_count; } -static inline unsigned long br_multicast_gmi(const struct net_bridge *br) +static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx) { /* use the RFC default of 2 for QRV */ - return 2 * br->multicast_query_interval + - br->multicast_query_response_interval; + return 2 * brmctx->multicast_query_interval + + brmctx->multicast_query_response_interval; +} + +static inline bool +br_multicast_ctx_is_vlan(const struct net_bridge_mcast *brmctx) +{ + return !!brmctx->vlan; +} + +static inline bool +br_multicast_port_ctx_is_vlan(const struct net_bridge_mcast_port *pmctx) +{ + return !!pmctx->vlan; +} + +static inline struct net_bridge_mcast * +br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) +{ + if (!br_multicast_port_ctx_is_vlan(pmctx)) + return &pmctx->port->br->multicast_ctx; + else + return &pmctx->vlan->brvlan->br_mcast_ctx; +} + +static inline bool +br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) +{ + return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && + br_multicast_ctx_is_vlan(brmctx) && + !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED); +} + +static inline bool +br_multicast_ctx_vlan_disabled(const struct net_bridge_mcast *brmctx) +{ + return br_multicast_ctx_is_vlan(brmctx) && + !(brmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED); +} + +static inline bool +br_multicast_port_ctx_vlan_disabled(const struct net_bridge_mcast_port *pmctx) +{ + return br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED); +} + +static inline bool +br_multicast_port_ctx_state_disabled(const struct net_bridge_mcast_port *pmctx) +{ + return pmctx->port->state == BR_STATE_DISABLED || + (br_multicast_port_ctx_is_vlan(pmctx) && + (br_multicast_port_ctx_vlan_disabled(pmctx) || + pmctx->vlan->state == BR_STATE_DISABLED)); +} + +static inline bool +br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx) +{ + return br_multicast_port_ctx_state_disabled(pmctx) || + pmctx->port->state == BR_STATE_BLOCKING || + (br_multicast_port_ctx_is_vlan(pmctx) && + pmctx->vlan->state == BR_STATE_BLOCKING); } #else -static inline int br_multicast_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx, + struct net_bridge_mcast_port **pmctx, + struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid) { return 0; } -static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, +static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid) { return NULL; @@ -1064,17 +1224,18 @@ static inline void br_multicast_dev_del(struct net_bridge *br) static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, + struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { } -static inline bool br_multicast_is_router(struct net_bridge *br, +static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb) { return false; } -static inline bool br_multicast_querier_exists(struct net_bridge *br, +static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, struct ethhdr *eth, const struct net_bridge_mdb_entry *mdb) { @@ -1118,13 +1279,65 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return 0; } + +static inline void br_multicast_ctx_init(struct net_bridge *br, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast *brmctx) +{ +} + +static inline void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx) +{ +} + +static inline void br_multicast_port_ctx_init(struct net_bridge_port *port, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast_port *pmctx) +{ +} + +static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) +{ +} + +static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, + bool on) +{ +} + +static inline void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, + bool on) +{ +} + +static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br, + bool on, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, + bool on) +{ + return false; +} + +static inline int br_mdb_replay(struct net_device *br_dev, + struct net_device *dev, const void *ctx, + bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif /* br_vlan.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, - u16 *vid, u8 *state); + u16 *vid, u8 *state, + struct net_bridge_vlan **vlan); bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb); bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); @@ -1168,6 +1381,9 @@ void br_vlan_notify(const struct net_bridge *br, const struct net_bridge_port *p, u16 vid, u16 vid_range, int cmd); +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack); bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end); @@ -1236,8 +1452,11 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) static inline bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, - u16 *vid, u8 *state) + u16 *vid, u8 *state, + struct net_bridge_vlan **vlan) + { + *vlan = NULL; return true; } @@ -1410,6 +1629,14 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, { return true; } + +static inline int br_vlan_replay(struct net_device *br_dev, + struct net_device *dev, const void *ctx, + bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif /* br_vlan_options.c */ @@ -1424,6 +1651,14 @@ int br_vlan_process_options(const struct net_bridge *br, struct net_bridge_vlan *range_end, struct nlattr **tb, struct netlink_ext_ack *extack); +int br_vlan_rtm_process_global_options(struct net_device *dev, + const struct nlattr *attr, + int cmd, + struct netlink_ext_ack *extack); +bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *r_end); +bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, + const struct net_bridge_vlan *v_opts); /* vlan state manipulation helpers using *_ONCE to annotate lock-free access */ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) @@ -1645,7 +1880,25 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; } /* br_switchdev.c */ #ifdef CONFIG_NET_SWITCHDEV -int nbp_switchdev_mark_set(struct net_bridge_port *p); +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); + +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + +bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb); + +void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb); + +void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, + struct sk_buff *skb); +void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, + struct sk_buff *skb); void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb); bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, @@ -1659,15 +1912,50 @@ void br_switchdev_fdb_notify(struct net_bridge *br, int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, struct netlink_ext_ack *extack); int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid); +void br_switchdev_init(struct net_bridge *br); static inline void br_switchdev_frame_unmark(struct sk_buff *skb) { skb->offload_fwd_mark = 0; } #else -static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +static inline int +br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + +static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) +{ + return false; +} + +static inline void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb) +{ +} + +static inline void +nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline void +nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, + struct sk_buff *skb) { - return 0; } static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, @@ -1710,6 +1998,11 @@ br_switchdev_fdb_notify(struct net_bridge *br, static inline void br_switchdev_frame_unmark(struct sk_buff *skb) { } + +static inline void br_switchdev_init(struct net_bridge *br) +{ +} + #endif /* CONFIG_NET_SWITCHDEV */ /* br_arp_nd_proxy.c */ diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h index f89049f4892c..adf82a05515a 100644 --- a/net/bridge/br_private_mcast_eht.h +++ b/net/bridge/br_private_mcast_eht.h @@ -51,7 +51,8 @@ struct net_bridge_group_eht_set { #ifdef CONFIG_BRIDGE_IGMP_SNOOPING void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg); -bool br_multicast_eht_handle(struct net_bridge_port_group *pg, +bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index d3adee0f91f9..6bf518d78f02 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -8,50 +8,65 @@ #include "br_private.h" -static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) -{ - struct net_bridge_port *p; +static struct static_key_false br_switchdev_tx_fwd_offload; - /* dev is yet to be added to the port list. */ - list_for_each_entry(p, &br->port_list, list) { - if (netdev_port_same_parent_id(dev, p->dev)) - return p->offload_fwd_mark; - } +static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) + return false; - return ++br->offload_fwd_mark; + return (p->flags & BR_TX_FWD_OFFLOAD) && + (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom); } -int nbp_switchdev_mark_set(struct net_bridge_port *p) +bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) { - struct netdev_phys_item_id ppid = { }; - int err; + if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) + return false; - ASSERT_RTNL(); + return BR_INPUT_SKB_CB(skb)->tx_fwd_offload; +} - err = dev_get_port_parent_id(p->dev, &ppid, true); - if (err) { - if (err == -EOPNOTSUPP) - return 0; - return err; - } +void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb) +{ + skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb); +} - p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); +/* Mark the frame for TX forwarding offload if this egress port supports it */ +void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (nbp_switchdev_can_offload_tx_fwd(p, skb)) + BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true; +} - return 0; +/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms + * that the skb has been already forwarded to, to avoid further cloning to + * other ports in the same hwdom by making nbp_switchdev_allowed_egress() + * return false. + */ +void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (nbp_switchdev_can_offload_tx_fwd(p, skb)) + set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms); } void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb) { - if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) - BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; + if (p->hwdom) + BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom; } bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, const struct sk_buff *skb) { - return !skb->offload_fwd_mark || - BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; + struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb); + + return !test_bit(p->hwdom, &cb->fwd_hwdoms) && + (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom); } /* Flags that can be offloaded to hardware */ @@ -112,7 +127,6 @@ br_switchdev_fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type) { const struct net_bridge_port *dst = READ_ONCE(fdb->dst); - struct net_device *dev = dst ? dst->dev : br->dev; struct switchdev_notifier_fdb_info info = { .addr = fdb->key.addr.addr, .vid = fdb->key.vlan_id, @@ -120,6 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br, .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags), .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags), }; + struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev; switch (type) { case RTM_DELNEIGH: @@ -156,3 +171,182 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) return switchdev_port_obj_del(dev, &v.obj); } + +static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining) +{ + struct net_bridge *br = joining->br; + struct net_bridge_port *p; + int hwdom; + + /* joining is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) { + joining->hwdom = p->hwdom; + return 0; + } + } + + hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1); + if (hwdom >= BR_HWDOM_MAX) + return -EBUSY; + + set_bit(hwdom, &br->busy_hwdoms); + joining->hwdom = hwdom; + return 0; +} + +static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving) +{ + struct net_bridge *br = leaving->br; + struct net_bridge_port *p; + + /* leaving is no longer in the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (p->hwdom == leaving->hwdom) + return; + } + + clear_bit(leaving->hwdom, &br->busy_hwdoms); +} + +static int nbp_switchdev_add(struct net_bridge_port *p, + struct netdev_phys_item_id ppid, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + int err; + + if (p->offload_count) { + /* Prevent unsupported configurations such as a bridge port + * which is a bonding interface, and the member ports are from + * different hardware switches. + */ + if (!netdev_phys_item_id_same(&p->ppid, &ppid)) { + NL_SET_ERR_MSG_MOD(extack, + "Same bridge port cannot be offloaded by two physical switches"); + return -EBUSY; + } + + /* Tolerate drivers that call switchdev_bridge_port_offload() + * more than once for the same bridge port, such as when the + * bridge port is an offloaded bonding/team interface. + */ + p->offload_count++; + + return 0; + } + + p->ppid = ppid; + p->offload_count = 1; + + err = nbp_switchdev_hwdom_set(p); + if (err) + return err; + + if (tx_fwd_offload) { + p->flags |= BR_TX_FWD_OFFLOAD; + static_branch_inc(&br_switchdev_tx_fwd_offload); + } + + return 0; +} + +static void nbp_switchdev_del(struct net_bridge_port *p) +{ + if (WARN_ON(!p->offload_count)) + return; + + p->offload_count--; + + if (p->offload_count) + return; + + if (p->hwdom) + nbp_switchdev_hwdom_put(p); + + if (p->flags & BR_TX_FWD_OFFLOAD) { + p->flags &= ~BR_TX_FWD_OFFLOAD; + static_branch_dec(&br_switchdev_tx_fwd_offload); + } +} + +static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + struct netlink_ext_ack *extack) +{ + struct net_device *br_dev = p->br->dev; + struct net_device *dev = p->dev; + int err; + + err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack); + if (err && err != -EOPNOTSUPP) + return err; + + err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack); + if (err && err != -EOPNOTSUPP) + return err; + + err = br_fdb_replay(br_dev, ctx, true, atomic_nb); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + struct net_device *br_dev = p->br->dev; + struct net_device *dev = p->dev; + + br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + + br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + + br_fdb_replay(br_dev, ctx, false, atomic_nb); +} + +/* Let the bridge know that this port is offloaded, so that it can assign a + * switchdev hardware domain to it. + */ +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct netdev_phys_item_id ppid; + int err; + + err = dev_get_port_parent_id(dev, &ppid, false); + if (err) + return err; + + err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack); + if (err) + return err; + + err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack); + if (err) + goto out_switchdev_del; + + return 0; + +out_switchdev_del: + nbp_switchdev_del(p); + + return err; +} + +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb); + + nbp_switchdev_del(p); +} diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 381467b691d5..953d544663d5 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -384,7 +384,7 @@ static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%d\n", br->multicast_router); + return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router); } static int set_multicast_router(struct net_bridge *br, unsigned long val, @@ -514,7 +514,7 @@ static ssize_t multicast_igmp_version_show(struct device *d, { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_igmp_version); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version); } static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val, @@ -536,13 +536,13 @@ static ssize_t multicast_last_member_count_show(struct device *d, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_last_member_count); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count); } static int set_last_member_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_last_member_count = val; + br->multicast_ctx.multicast_last_member_count = val; return 0; } @@ -558,13 +558,13 @@ static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_startup_query_count); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count); } static int set_startup_query_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_startup_query_count = val; + br->multicast_ctx.multicast_startup_query_count = val; return 0; } @@ -581,13 +581,13 @@ static ssize_t multicast_last_member_interval_show( { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_last_member_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval)); } static int set_last_member_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_last_member_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val); return 0; } @@ -604,13 +604,13 @@ static ssize_t multicast_membership_interval_show( { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_membership_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval)); } static int set_membership_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_membership_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val); return 0; } @@ -628,13 +628,13 @@ static ssize_t multicast_querier_interval_show(struct device *d, { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_querier_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval)); } static int set_querier_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_querier_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val); return 0; } @@ -652,13 +652,13 @@ static ssize_t multicast_query_interval_show(struct device *d, { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_query_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval)); } static int set_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); return 0; } @@ -676,13 +676,13 @@ static ssize_t multicast_query_response_interval_show( struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", - jiffies_to_clock_t(br->multicast_query_response_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval)); } static int set_query_response_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_query_response_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val); return 0; } @@ -700,13 +700,13 @@ static ssize_t multicast_startup_query_interval_show( struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", - jiffies_to_clock_t(br->multicast_startup_query_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval)); } static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_startup_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); return 0; } @@ -751,7 +751,7 @@ static ssize_t multicast_mld_version_show(struct device *d, { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_mld_version); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version); } static int set_multicast_mld_version(struct net_bridge *br, unsigned long val, diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 72e92376eef1..e9e3aedd3178 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -244,7 +244,7 @@ BRPORT_ATTR_FLAG(isolated, BR_ISOLATED); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) { - return sprintf(buf, "%d\n", p->multicast_router); + return sprintf(buf, "%d\n", p->multicast_ctx.multicast_router); } static int store_multicast_router(struct net_bridge_port *p, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index a08e9f193009..8cfd035bbaf9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -190,6 +190,8 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv) rhashtable_remove_fast(&vg->vlan_hash, &masterv->vnode, br_vlan_rht_params); __vlan_del_list(masterv); + br_multicast_toggle_one_vlan(masterv, false); + br_multicast_ctx_deinit(&masterv->br_mcast_ctx); call_rcu(&masterv->rcu, br_master_vlan_rcu_free); } } @@ -280,10 +282,13 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, } else { v->stats = masterv->stats; } + br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx); } else { err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack); if (err && err != -EOPNOTSUPP) goto out; + br_multicast_ctx_init(br, v, &v->br_mcast_ctx); + v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED; } /* Add the dev mac and count the vlan only if it's usable */ @@ -306,6 +311,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, __vlan_add_list(v); __vlan_add_flags(v, flags); + br_multicast_toggle_one_vlan(v, true); if (p) nbp_vlan_set_vlan_dev_state(p, v->vid); @@ -374,6 +380,8 @@ static int __vlan_del(struct net_bridge_vlan *v) br_vlan_rht_params); __vlan_del_list(v); nbp_vlan_set_vlan_dev_state(p, v->vid); + br_multicast_toggle_one_vlan(v, false); + br_multicast_port_ctx_deinit(&v->port_mcast_ctx); call_rcu(&v->rcu, nbp_vlan_rcu_free); } @@ -457,7 +465,15 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, u64_stats_update_end(&stats->syncp); } - if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) + /* If the skb will be sent using forwarding offload, the assumption is + * that the switchdev will inject the packet into hardware together + * with the bridge VLAN, so that it can be forwarded according to that + * VLAN. The switchdev should deal with popping the VLAN header in + * hardware on each egress port as appropriate. So only strip the VLAN + * header if forwarding offload is not being used. + */ + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED && + !br_switchdev_frame_uses_tx_fwd_offload(skb)) __vlan_hwaccel_clear_tag(skb); if (p && (p->flags & BR_VLAN_TUNNEL) && @@ -473,7 +489,8 @@ out: static bool __allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid, - u8 *state) + u8 *state, + struct net_bridge_vlan **vlan) { struct pcpu_sw_netstats *stats; struct net_bridge_vlan *v; @@ -538,8 +555,9 @@ static bool __allowed_ingress(const struct net_bridge *br, */ skb->vlan_tci |= pvid; - /* if stats are disabled we can avoid the lookup */ - if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { + /* if snooping and stats are disabled we can avoid the lookup */ + if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && + !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); return br_vlan_state_allowed(*state, true); @@ -566,6 +584,8 @@ static bool __allowed_ingress(const struct net_bridge *br, u64_stats_update_end(&stats->syncp); } + *vlan = v; + return true; drop: @@ -575,17 +595,19 @@ drop: bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, - u16 *vid, u8 *state) + u16 *vid, u8 *state, + struct net_bridge_vlan **vlan) { /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ + *vlan = NULL; if (!br_opt_get(br, BROPT_VLAN_ENABLED)) { BR_INPUT_SKB_CB(skb)->vlan_filtered = false; return true; } - return __allowed_ingress(br, vg, skb, vid, state); + return __allowed_ingress(br, vg, skb, vid, state, vlan); } /* Called under RCU. */ @@ -818,14 +840,21 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val, if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val) return 0; + br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val); + err = switchdev_port_attr_set(br->dev, &attr, extack); - if (err && err != -EOPNOTSUPP) + if (err && err != -EOPNOTSUPP) { + br_opt_toggle(br, BROPT_VLAN_ENABLED, !val); return err; + } - br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val); br_manage_promisc(br); recalculate_group_addr(br); br_recalculate_fwd_mask(br); + if (!val && br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + br_info(br, "vlan filtering disabled, automatically disabling multicast vlan snooping\n"); + br_multicast_toggle_vlan_snooping(br, false, NULL); + } return 0; } @@ -1420,6 +1449,33 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, } EXPORT_SYMBOL_GPL(br_vlan_get_info); +int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge_port *p; + + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group_rcu(netdev_priv(dev)); + else + return -EINVAL; + + v = br_vlan_find(vg, vid); + if (!v) + return -ENOENT; + + p_vinfo->vid = vid; + p_vinfo->flags = v->flags; + if (vid == br_get_pvid(vg)) + p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID; + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_info_rcu); + static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) { return is_vlan_dev(dev) && @@ -1838,6 +1894,9 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, ASSERT_RTNL(); + if (!nb) + return 0; + if (!netif_is_bridge_master(br_dev)) return -EINVAL; @@ -1884,7 +1943,6 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(br_vlan_replay); /* check if v_curr can enter a range ending in range_end */ bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, @@ -1901,6 +1959,7 @@ static int br_vlan_dump_dev(const struct net_device *dev, u32 dump_flags) { struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL; + bool dump_global = !!(dump_flags & BRIDGE_VLANDB_DUMPF_GLOBAL); bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS); struct net_bridge_vlan_group *vg; int idx = 0, s_idx = cb->args[1]; @@ -1919,6 +1978,10 @@ static int br_vlan_dump_dev(const struct net_device *dev, vg = br_vlan_group_rcu(br); p = NULL; } else { + /* global options are dumped only for bridge devices */ + if (dump_global) + return 0; + p = br_port_get_rcu(dev); if (WARN_ON(!p)) return -EINVAL; @@ -1941,7 +2004,7 @@ static int br_vlan_dump_dev(const struct net_device *dev, /* idx must stay at range's beginning until it is filled in */ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { - if (!br_vlan_should_use(v)) + if (!dump_global && !br_vlan_should_use(v)) continue; if (idx < s_idx) { idx++; @@ -1954,8 +2017,21 @@ static int br_vlan_dump_dev(const struct net_device *dev, continue; } - if (dump_stats || v->vid == pvid || - !br_vlan_can_enter_range(v, range_end)) { + if (dump_global) { + if (br_vlan_global_opts_can_enter_range(v, range_end)) + continue; + if (!br_vlan_global_opts_fill(skb, range_start->vid, + range_end->vid, + range_start)) { + err = -EMSGSIZE; + break; + } + /* advance number of filled vlans */ + idx += range_end->vid - range_start->vid + 1; + + range_start = v; + } else if (dump_stats || v->vid == pvid || + !br_vlan_can_enter_range(v, range_end)) { u16 vlan_flags = br_vlan_flags(range_start, pvid); if (!br_vlan_fill_vids(skb, range_start->vid, @@ -1977,11 +2053,18 @@ static int br_vlan_dump_dev(const struct net_device *dev, * - last vlan (range_start == range_end, not in range) * - last vlan range (range_start != range_end, in range) */ - if (!err && range_start && - !br_vlan_fill_vids(skb, range_start->vid, range_end->vid, - range_start, br_vlan_flags(range_start, pvid), - dump_stats)) - err = -EMSGSIZE; + if (!err && range_start) { + if (dump_global && + !br_vlan_global_opts_fill(skb, range_start->vid, + range_end->vid, range_start)) + err = -EMSGSIZE; + else if (!dump_global && + !br_vlan_fill_vids(skb, range_start->vid, + range_end->vid, range_start, + br_vlan_flags(range_start, pvid), + dump_stats)) + err = -EMSGSIZE; + } cb->args[1] = err ? idx : 0; @@ -2185,12 +2268,22 @@ static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh, } nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) { - if (nla_type(attr) != BRIDGE_VLANDB_ENTRY) + switch (nla_type(attr)) { + case BRIDGE_VLANDB_ENTRY: + err = br_vlan_rtm_process_one(dev, attr, + nlh->nlmsg_type, + extack); + break; + case BRIDGE_VLANDB_GLOBAL_OPTIONS: + err = br_vlan_rtm_process_global_options(dev, attr, + nlh->nlmsg_type, + extack); + break; + default: continue; + } vlans++; - err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type, - extack); if (err) break; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index b4add9ea8964..4ef975b20185 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -258,3 +258,219 @@ int br_vlan_process_options(const struct net_bridge *br, return err; } + +bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *r_end) +{ + return v_curr->vid - r_end->vid == 1 && + ((v_curr->priv_flags ^ r_end->priv_flags) & + BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0; +} + +bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, + const struct net_bridge_vlan *v_opts) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_GLOBAL_OPTIONS); + if (!nest) + return false; + + if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_ID, vid)) + goto out_err; + + if (vid_range && vid < vid_range && + nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_RANGE, vid_range)) + goto out_err; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))) + goto out_err; +#endif + + nla_nest_end(skb, nest); + + return true; + +out_err: + nla_nest_cancel(skb, nest); + return false; +} + +static size_t rtnl_vlan_global_opts_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_vlan_msg)) + + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */ + + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_ID */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING */ +#endif + + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */ +} + +static void br_vlan_global_opts_notify(const struct net_bridge *br, + u16 vid, u16 vid_range) +{ + struct net_bridge_vlan *v; + struct br_vlan_msg *bvm; + struct nlmsghdr *nlh; + struct sk_buff *skb; + int err = -ENOBUFS; + + /* right now notifications are done only with rtnl held */ + ASSERT_RTNL(); + + skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out_err; + + err = -EMSGSIZE; + nlh = nlmsg_put(skb, 0, 0, RTM_NEWVLAN, sizeof(*bvm), 0); + if (!nlh) + goto out_err; + bvm = nlmsg_data(nlh); + memset(bvm, 0, sizeof(*bvm)); + bvm->family = AF_BRIDGE; + bvm->ifindex = br->dev->ifindex; + + /* need to find the vlan due to flags/options */ + v = br_vlan_find(br_vlan_group(br), vid); + if (!v) + goto out_kfree; + + if (!br_vlan_global_opts_fill(skb, vid, vid_range, v)) + goto out_err; + + nlmsg_end(skb, nlh); + rtnl_notify(skb, dev_net(br->dev), 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL); + return; + +out_err: + rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err); +out_kfree: + kfree_skb(skb); +} + +static int br_vlan_process_global_one_opts(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, + struct nlattr **tb, + bool *changed, + struct netlink_ext_ack *extack) +{ + *changed = false; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]) { + u8 mc_snooping; + + mc_snooping = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]); + if (br_multicast_toggle_global_vlan(v, !!mc_snooping)) + *changed = true; + } +#endif + + return 0; +} + +static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = { + [BRIDGE_VLANDB_GOPTS_ID] = { .type = NLA_U16 }, + [BRIDGE_VLANDB_GOPTS_RANGE] = { .type = NLA_U16 }, + [BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING] = { .type = NLA_U8 }, +}; + +int br_vlan_rtm_process_global_options(struct net_device *dev, + const struct nlattr *attr, + int cmd, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL; + struct nlattr *tb[BRIDGE_VLANDB_GOPTS_MAX + 1]; + struct net_bridge_vlan_group *vg; + u16 vid, vid_range = 0; + struct net_bridge *br; + int err = 0; + + if (cmd != RTM_NEWVLAN) { + NL_SET_ERR_MSG_MOD(extack, "Global vlan options support only set operation"); + return -EINVAL; + } + if (!netif_is_bridge_master(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Global vlan options can only be set on bridge device"); + return -EINVAL; + } + br = netdev_priv(dev); + vg = br_vlan_group(br); + if (WARN_ON(!vg)) + return -ENODEV; + + err = nla_parse_nested(tb, BRIDGE_VLANDB_GOPTS_MAX, attr, + br_vlan_db_gpol, extack); + if (err) + return err; + + if (!tb[BRIDGE_VLANDB_GOPTS_ID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry id"); + return -EINVAL; + } + vid = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_ID]); + if (!br_vlan_valid_id(vid, extack)) + return -EINVAL; + + if (tb[BRIDGE_VLANDB_GOPTS_RANGE]) { + vid_range = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_RANGE]); + if (!br_vlan_valid_id(vid_range, extack)) + return -EINVAL; + if (vid >= vid_range) { + NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id"); + return -EINVAL; + } + } else { + vid_range = vid; + } + + for (; vid <= vid_range; vid++) { + bool changed = false; + + v = br_vlan_find(vg, vid); + if (!v) { + NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process global options"); + err = -ENOENT; + break; + } + + err = br_vlan_process_global_one_opts(br, vg, v, tb, &changed, + extack); + if (err) + break; + + if (changed) { + /* vlan options changed, check for range */ + if (!curr_start) { + curr_start = v; + curr_end = v; + continue; + } + + if (!br_vlan_global_opts_can_enter_range(v, curr_end)) { + br_vlan_global_opts_notify(br, curr_start->vid, + curr_end->vid); + curr_start = v; + } + curr_end = v; + } else { + /* nothing changed and nothing to notify yet */ + if (!curr_start) + continue; + + br_vlan_global_opts_notify(br, curr_start->vid, + curr_end->vid); + curr_start = NULL; + curr_end = NULL; + } + } + if (curr_start) + br_vlan_global_opts_notify(br, curr_start->vid, curr_end->vid); + + return err; +} diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 12369b604ce9..f6df20808f5e 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -20,9 +20,12 @@ struct j1939_session; enum j1939_sk_errqueue_type { - J1939_ERRQUEUE_ACK, - J1939_ERRQUEUE_SCHED, - J1939_ERRQUEUE_ABORT, + J1939_ERRQUEUE_TX_ACK, + J1939_ERRQUEUE_TX_SCHED, + J1939_ERRQUEUE_TX_ABORT, + J1939_ERRQUEUE_RX_RTS, + J1939_ERRQUEUE_RX_DPO, + J1939_ERRQUEUE_RX_ABORT, }; /* j1939 devices */ @@ -87,6 +90,7 @@ struct j1939_priv { struct list_head j1939_socks; struct kref rx_kref; + u32 rx_tskey; }; void j1939_ecu_put(struct j1939_ecu *ecu); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 54f6d521492f..6dff4510687a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -352,7 +352,7 @@ static void j1939_sk_sock_destruct(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); - /* This function will be call by the generic networking code, when then + /* This function will be called by the generic networking code, when * the socket is ultimately closed (sk->sk_destruct). * * The race between @@ -905,20 +905,33 @@ failure: return NULL; } -static size_t j1939_sk_opt_stats_get_size(void) +static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type) { - return - nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ - 0; + switch (type) { + case J1939_ERRQUEUE_RX_RTS: + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */ + nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */ + nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */ + nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */ + nla_total_size(sizeof(u8)) + /* J1939_NLA_SRC_ADDR */ + nla_total_size(sizeof(u8)) + /* J1939_NLA_DEST_ADDR */ + 0; + default: + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ + 0; + } } static struct sk_buff * -j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) +j1939_sk_get_timestamping_opt_stats(struct j1939_session *session, + enum j1939_sk_errqueue_type type) { struct sk_buff *stats; u32 size; - stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC); + stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC); if (!stats) return NULL; @@ -928,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) size = min(session->pkt.tx_acked * 7, session->total_message_size); - nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + switch (type) { + case J1939_ERRQUEUE_RX_RTS: + nla_put_u32(stats, J1939_NLA_TOTAL_SIZE, + session->total_message_size); + nla_put_u32(stats, J1939_NLA_PGN, + session->skcb.addr.pgn); + nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME, + session->skcb.addr.src_name, J1939_NLA_PAD); + nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME, + session->skcb.addr.dst_name, J1939_NLA_PAD); + nla_put_u8(stats, J1939_NLA_SRC_ADDR, + session->skcb.addr.sa); + nla_put_u8(stats, J1939_NLA_DEST_ADDR, + session->skcb.addr.da); + break; + default: + nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + } return stats; } -void j1939_sk_errqueue(struct j1939_session *session, - enum j1939_sk_errqueue_type type) +static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, + enum j1939_sk_errqueue_type type) { struct j1939_priv *priv = session->priv; - struct sock *sk = session->sk; struct j1939_sock *jsk; struct sock_exterr_skb *serr; struct sk_buff *skb; char *state = "UNK"; int err; - /* currently we have no sk for the RX session */ - if (!sk) - return; - jsk = j1939_sk(sk); if (!(jsk->state & J1939_SOCK_ERRQUEUE)) return; - skb = j1939_sk_get_timestamping_opt_stats(session); + switch (type) { + case J1939_ERRQUEUE_TX_ACK: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + return; + break; + case J1939_ERRQUEUE_TX_SCHED: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + return; + break; + case J1939_ERRQUEUE_TX_ABORT: + break; + case J1939_ERRQUEUE_RX_RTS: + fallthrough; + case J1939_ERRQUEUE_RX_DPO: + fallthrough; + case J1939_ERRQUEUE_RX_ABORT: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE)) + return; + break; + default: + netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); + } + + skb = j1939_sk_get_timestamping_opt_stats(session, type); if (!skb) return; @@ -964,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session, serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); switch (type) { - case J1939_ERRQUEUE_ACK: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) { - kfree_skb(skb); - return; - } - + case J1939_ERRQUEUE_TX_ACK: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_ACK; - state = "ACK"; + state = "TX ACK"; break; - case J1939_ERRQUEUE_SCHED: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) { - kfree_skb(skb); - return; - } - + case J1939_ERRQUEUE_TX_SCHED: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_SCHED; - state = "SCH"; + state = "TX SCH"; break; - case J1939_ERRQUEUE_ABORT: + case J1939_ERRQUEUE_TX_ABORT: serr->ee.ee_errno = session->err; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_info = J1939_EE_INFO_TX_ABORT; - state = "ABT"; + state = "TX ABT"; + break; + case J1939_ERRQUEUE_RX_RTS: + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_RTS; + state = "RX RTS"; + break; + case J1939_ERRQUEUE_RX_DPO: + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_DPO; + state = "RX DPO"; + break; + case J1939_ERRQUEUE_RX_ABORT: + serr->ee.ee_errno = session->err; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_ABORT; + state = "RX ABT"; break; - default: - netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); } serr->opt_stats = true; @@ -1008,6 +1062,27 @@ void j1939_sk_errqueue(struct j1939_session *session, kfree_skb(skb); }; +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sock *jsk; + + if (session->sk) { + /* send TX notifications to the socket of origin */ + __j1939_sk_errqueue(session, session->sk, type); + return; + } + + /* spread RX notifications to all sockets subscribed to this session */ + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + if (j1939_sk_recv_match_one(jsk, &session->skcb)) + __j1939_sk_errqueue(session, &jsk->sk, type); + } + spin_unlock_bh(&priv->j1939_socks_lock); +}; + void j1939_sk_send_loop_abort(struct sock *sk, int err) { sk->sk_err = err; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bdc95bd7a851..bb5c4b8979be 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session) static void j1939_session_destroy(struct j1939_session *session) { - if (session->err) - j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); - else - j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK); + if (session->transmission) { + if (session->err) + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK); + } else if (session->err) { + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); + } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); @@ -776,7 +780,7 @@ static int j1939_session_tx_dpo(struct j1939_session *session) static int j1939_session_tx_dat(struct j1939_session *session) { struct j1939_priv *priv = session->priv; - struct j1939_sk_buff_cb *skcb; + struct j1939_sk_buff_cb *se_skcb; int offset, pkt_done, pkt_end; unsigned int len, pdelay; struct sk_buff *se_skb; @@ -788,7 +792,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) if (!se_skb) return -ENOBUFS; - skcb = j1939_skb_to_cb(se_skb); + se_skcb = j1939_skb_to_cb(se_skb); tpdat = se_skb->data; ret = 0; pkt_done = 0; @@ -800,7 +804,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) while (session->pkt.tx < pkt_end) { dat[0] = session->pkt.tx - session->pkt.dpo + 1; - offset = (session->pkt.tx * 7) - skcb->offset; + offset = (session->pkt.tx * 7) - se_skcb->offset; len = se_skb->len - offset; if (len > 7) len = 7; @@ -808,7 +812,8 @@ static int j1939_session_tx_dat(struct j1939_session *session) if (offset + len > se_skb->len) { netdev_err_once(priv->ndev, "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n", - __func__, session, skcb->offset, se_skb->len , session->pkt.tx); + __func__, session, se_skcb->offset, + se_skb->len , session->pkt.tx); ret = -EOVERFLOW; goto out_free; } @@ -821,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) memcpy(&dat[1], &tpdat[offset], len); ret = j1939_tp_tx_dat(session, dat, len + 1); if (ret < 0) { - /* ENOBUS == CAN interface TX queue is full */ + /* ENOBUFS == CAN interface TX queue is full */ if (ret != -ENOBUFS) netdev_alert(priv->ndev, "%s: 0x%p: queue data error: %i\n", @@ -1043,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session) if (ret) goto out_free; - j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED); j1939_sk_queue_activate_next(session); out_free: @@ -1097,7 +1102,7 @@ j1939_session_deactivate_activate_next(struct j1939_session *session) } static void __j1939_session_cancel(struct j1939_session *session, - enum j1939_xtp_abort err) + enum j1939_xtp_abort err) { struct j1939_priv *priv = session->priv; @@ -1115,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session, if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static void j1939_session_cancel(struct j1939_session *session, @@ -1195,13 +1202,13 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) static void j1939_session_completed(struct j1939_session *session) { - struct sk_buff *skb; + struct sk_buff *se_skb; if (!session->transmission) { - skb = j1939_session_skb_get(session); + se_skb = j1939_session_skb_get(session); /* distribute among j1939 receivers */ - j1939_sk_recv(session->priv, skb); - consume_skb(skb); + j1939_sk_recv(session->priv, se_skb); + consume_skb(se_skb); } j1939_session_deactivate_activate_next(session); @@ -1268,12 +1275,14 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, break; case J1939_ETP_CMD_RTS: - case J1939_TP_CMD_RTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_RTS: abort = J1939_XTP_ABORT_BUSY; break; case J1939_ETP_CMD_CTS: - case J1939_TP_CMD_CTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_CTS: abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN; break; @@ -1282,7 +1291,8 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, break; case J1939_ETP_CMD_EOMA: - case J1939_TP_CMD_EOMA: /* fall through */ + fallthrough; + case J1939_TP_CMD_EOMA: abort = J1939_XTP_ABORT_OTHER; break; @@ -1326,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, session->err = j1939_xtp_abort_to_errno(priv, abort); if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); j1939_session_deactivate_activate_next(session); abort_put: @@ -1434,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) if (session->transmission) { if (session->pkt.tx_acked) j1939_sk_errqueue(session, - J1939_ERRQUEUE_SCHED); + J1939_ERRQUEUE_TX_SCHED); j1939_session_txtimer_cancel(session); j1939_tp_schedule_txtimer(session, 0); } @@ -1626,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, session->pkt.rx = 0; session->pkt.tx = 0; + session->tskey = priv->rx_tskey++; + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS); + WARN_ON_ONCE(j1939_session_activate(session)); return session; @@ -1748,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session, session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); session->last_cmd = dat[0]; j1939_tp_set_rxtimeout(session, 750); + + if (!session->transmission) + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO); } static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, @@ -1772,7 +1790,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, struct sk_buff *skb) { struct j1939_priv *priv = session->priv; - struct j1939_sk_buff_cb *skcb; + struct j1939_sk_buff_cb *skcb, *se_skcb; struct sk_buff *se_skb = NULL; const u8 *dat; u8 *tpdat; @@ -1797,7 +1815,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, break; fallthrough; case J1939_TP_CMD_BAM: - case J1939_TP_CMD_CTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_CTS: if (skcb->addr.type != J1939_ETP) break; fallthrough; @@ -1822,8 +1841,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, goto out_session_cancel; } - skcb = j1939_skb_to_cb(se_skb); - offset = packet * 7 - skcb->offset; + se_skcb = j1939_skb_to_cb(se_skb); + offset = packet * 7 - se_skcb->offset; nbytes = se_skb->len - offset; if (nbytes > 7) nbytes = 7; @@ -1851,7 +1870,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, if (packet == session->pkt.rx) session->pkt.rx++; - if (skcb->addr.type != J1939_ETP && + if (se_skcb->addr.type != J1939_ETP && j1939_cb_is_broadcast(&session->skcb)) { if (session->pkt.rx >= session->pkt.total) final = true; @@ -2000,7 +2019,8 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) extd = J1939_ETP; fallthrough; case J1939_TP_CMD_BAM: - case J1939_TP_CMD_RTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_RTS: if (skcb->addr.type != extd) return; diff --git a/net/can/raw.c b/net/can/raw.c index cd5a49380116..7105fa4824e4 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -592,9 +592,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->count = count; out_fil: - if (dev) - dev_put(dev); - + dev_put(dev); release_sock(sk); rtnl_unlock(); @@ -638,9 +636,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->err_mask = err_mask; out_err: - if (dev) - dev_put(dev); - + dev_put(dev); release_sock(sk); rtnl_unlock(); diff --git a/net/core/Makefile b/net/core/Makefile index f7f16650fe9e..35ced6201814 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_FAILOVER) += failover.o -ifeq ($(CONFIG_INET),y) obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o -endif obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o diff --git a/net/core/dev.c b/net/core/dev.c index 8f1a47ad6781..eaaeff404ce9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -676,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po) } EXPORT_SYMBOL(dev_remove_offload); -/****************************************************************************** - * - * Device Boot-time Settings Routines - * - ******************************************************************************/ - -/* Boot time configuration table */ -static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; - -/** - * netdev_boot_setup_add - add new setup entry - * @name: name of the device - * @map: configured settings for the device - * - * Adds new setup entry to the dev_boot_setup list. The function - * returns 0 on error and 1 on success. This is a generic routine to - * all netdevices. - */ -static int netdev_boot_setup_add(char *name, struct ifmap *map) -{ - struct netdev_boot_setup *s; - int i; - - s = dev_boot_setup; - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { - memset(s[i].name, 0, sizeof(s[i].name)); - strlcpy(s[i].name, name, IFNAMSIZ); - memcpy(&s[i].map, map, sizeof(s[i].map)); - break; - } - } - - return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; -} - -/** - * netdev_boot_setup_check - check boot time settings - * @dev: the netdevice - * - * Check boot time settings for the device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found, 1 if they are. - */ -int netdev_boot_setup_check(struct net_device *dev) -{ - struct netdev_boot_setup *s = dev_boot_setup; - int i; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strcmp(dev->name, s[i].name)) { - dev->irq = s[i].map.irq; - dev->base_addr = s[i].map.base_addr; - dev->mem_start = s[i].map.mem_start; - dev->mem_end = s[i].map.mem_end; - return 1; - } - } - return 0; -} -EXPORT_SYMBOL(netdev_boot_setup_check); - - -/** - * netdev_boot_base - get address from boot time settings - * @prefix: prefix for network device - * @unit: id for network device - * - * Check boot time settings for the base address of device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found. - */ -unsigned long netdev_boot_base(const char *prefix, int unit) -{ - const struct netdev_boot_setup *s = dev_boot_setup; - char name[IFNAMSIZ]; - int i; - - sprintf(name, "%s%d", prefix, unit); - - /* - * If device already registered then return base of 1 - * to indicate not to probe for this interface - */ - if (__dev_get_by_name(&init_net, name)) - return 1; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) - if (!strcmp(name, s[i].name)) - return s[i].map.base_addr; - return 0; -} - -/* - * Saves at boot time configured settings for any netdevice. - */ -int __init netdev_boot_setup(char *str) -{ - int ints[5]; - struct ifmap map; - - str = get_options(str, ARRAY_SIZE(ints), ints); - if (!str || !*str) - return 0; - - /* Save settings */ - memset(&map, 0, sizeof(map)); - if (ints[0] > 0) - map.irq = ints[1]; - if (ints[0] > 1) - map.base_addr = ints[2]; - if (ints[0] > 2) - map.mem_start = ints[3]; - if (ints[0] > 3) - map.mem_end = ints[4]; - - /* Add new entry to the list */ - return netdev_boot_setup_add(str, &map); -} - -__setup("netdev=", netdev_boot_setup); - /******************************************************************************* * * Device Interface Subroutines @@ -956,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -1030,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -3099,6 +2972,50 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues); #endif /** + * netif_set_real_num_queues - set actual number of RX and TX queues used + * @dev: Network device + * @txq: Actual number of TX queues + * @rxq: Actual number of RX queues + * + * Set the real number of both TX and RX queues. + * Does nothing if the number of queues is already correct. + */ +int netif_set_real_num_queues(struct net_device *dev, + unsigned int txq, unsigned int rxq) +{ + unsigned int old_rxq = dev->real_num_rx_queues; + int err; + + if (txq < 1 || txq > dev->num_tx_queues || + rxq < 1 || rxq > dev->num_rx_queues) + return -EINVAL; + + /* Start from increases, so the error path only does decreases - + * decreases can't fail. + */ + if (rxq > dev->real_num_rx_queues) { + err = netif_set_real_num_rx_queues(dev, rxq); + if (err) + return err; + } + if (txq > dev->real_num_tx_queues) { + err = netif_set_real_num_tx_queues(dev, txq); + if (err) + goto undo_rx; + } + if (rxq < dev->real_num_rx_queues) + WARN_ON(netif_set_real_num_rx_queues(dev, rxq)); + if (txq < dev->real_num_tx_queues) + WARN_ON(netif_set_real_num_tx_queues(dev, txq)); + + return 0; +undo_rx: + WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq)); + return err; +} +EXPORT_SYMBOL(netif_set_real_num_queues); + +/** * netif_get_num_default_rss_queues - default number of RSS queues * * This routine should set an upper limit on the number of RSS queues @@ -4012,7 +3929,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) qdisc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { + switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -4756,45 +4673,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) return rxqueue; } -static u32 netif_receive_generic_xdp(struct sk_buff *skb, - struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) +u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { void *orig_data, *orig_data_end, *hard_start; struct netdev_rx_queue *rxqueue; - u32 metalen, act = XDP_DROP; bool orig_bcast, orig_host; u32 mac_len, frame_sz; __be16 orig_eth_type; struct ethhdr *eth; + u32 metalen, act; int off; - /* Reinjected packets coming from act_mirred or similar should - * not get XDP generic processing. - */ - if (skb_is_redirected(skb)) - return XDP_PASS; - - /* XDP packets must be linear and must have sufficient headroom - * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also - * native XDP provides, thus we need to do it here as well. - */ - if (skb_cloned(skb) || skb_is_nonlinear(skb) || - skb_headroom(skb) < XDP_PACKET_HEADROOM) { - int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); - int troom = skb->tail + skb->data_len - skb->end; - - /* In case we have to go down the path and also linearize, - * then lets do the pskb_expand_head() work just once here. - */ - if (pskb_expand_head(skb, - hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, - troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) - goto do_drop; - if (skb_linearize(skb)) - goto do_drop; - } - /* The XDP program wants to see the packet starting at the MAC * header. */ @@ -4849,6 +4739,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, skb->protocol = eth_type_trans(skb, skb->dev); } + /* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull + * before calling us again on redirect path. We do not call do_redirect + * as we leave that up to the caller. + * + * Caller is responsible for managing lifetime of skb (i.e. calling + * kfree_skb in response to actions it cannot handle/XDP_DROP). + */ switch (act) { case XDP_REDIRECT: case XDP_TX: @@ -4859,6 +4756,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, if (metalen) skb_metadata_set(skb, metalen); break; + } + + return act; +} + +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + u32 act = XDP_DROP; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_is_redirected(skb)) + return XDP_PASS; + + /* XDP packets must be linear and must have sufficient headroom + * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also + * native XDP provides, thus we need to do it here as well. + */ + if (skb_cloned(skb) || skb_is_nonlinear(skb) || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { + int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + int troom = skb->tail + skb->data_len - skb->end; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + if (pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) + goto do_drop; + if (skb_linearize(skb)) + goto do_drop; + } + + act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog); + switch (act) { + case XDP_REDIRECT: + case XDP_TX: + case XDP_PASS: + break; default: bpf_warn_invalid_xdp_action(act); fallthrough; @@ -5141,8 +5081,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, - &cl_res, false)) { + switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -5324,7 +5263,6 @@ another_round: ret = NET_RX_DROP; goto out; } - skb_reset_mac_len(skb); } if (eth_type_vlan(skb->protocol)) { @@ -5650,25 +5588,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) struct bpf_prog *new = xdp->prog; int ret = 0; - if (new) { - u32 i; - - mutex_lock(&new->aux->used_maps_mutex); - - /* generic XDP does not work with DEVMAPs that can - * have a bpf_prog installed on an entry - */ - for (i = 0; i < new->aux->used_map_cnt; i++) { - if (dev_map_can_have_prog(new->aux->used_maps[i]) || - cpu_map_prog_allowed(new->aux->used_maps[i])) { - mutex_unlock(&new->aux->used_maps_mutex); - return -EINVAL; - } - } - - mutex_unlock(&new->aux->used_maps_mutex); - } - switch (xdp->command) { case XDP_SETUP_PROG: rcu_assign_pointer(dev->xdp_prog, new); @@ -5876,7 +5795,7 @@ static void flush_all_backlogs(void) */ ASSERT_RTNL(); - get_online_cpus(); + cpus_read_lock(); cpumask_clear(&flush_cpus); for_each_online_cpu(cpu) { @@ -5894,7 +5813,7 @@ static void flush_all_backlogs(void) for_each_cpu(cpu, &flush_cpus) flush_work(per_cpu_ptr(&flush_works, cpu)); - put_online_cpus(); + cpus_read_unlock(); } /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ @@ -6011,7 +5930,6 @@ static void gro_list_prepare(const struct list_head *head, diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb); if (skb_vlan_tag_present(p)) diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb); - diffs |= skb_metadata_dst_cmp(p, skb); diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), @@ -6021,17 +5939,30 @@ static void gro_list_prepare(const struct list_head *head, skb_mac_header(skb), maclen); - diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); + /* in most common scenarions 'slow_gro' is 0 + * otherwise we are already on some slower paths + * either skip all the infrequent tests altogether or + * avoid trying too hard to skip each of them individually + */ + if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) { +#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *skb_ext; + struct tc_skb_ext *p_ext; +#endif + + diffs |= p->sk != skb->sk; + diffs |= skb_metadata_dst_cmp(p, skb); + diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); + #if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - if (!diffs) { - struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT); - struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT); + skb_ext = skb_ext_find(skb, TC_SKB_EXT); + p_ext = skb_ext_find(p, TC_SKB_EXT); diffs |= (!!p_ext) ^ (!!skb_ext); if (!diffs && unlikely(skb_ext)) diffs |= p_ext->chain ^ skb_ext->chain; - } #endif + } NAPI_GRO_CB(p)->same_flow = !diffs; } @@ -6296,8 +6227,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); - skb_ext_reset(skb); - nf_reset_ct(skb); + if (unlikely(skb->slow_gro)) { + skb_orphan(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); + skb->slow_gro = 0; + } napi->skb = skb; } @@ -10134,7 +10069,7 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); - rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); + rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!rx) return -ENOMEM; @@ -10201,7 +10136,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) if (count < 1 || count > 0xffff) return -EINVAL; - tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); + tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!tx) return -ENOMEM; @@ -10841,7 +10776,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); + p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!p) return NULL; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 478d032f34ac..0e87237fd871 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kmod.h> #include <linux/netdevice.h> +#include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> #include <linux/wireless.h> +#include <linux/if_bridge.h> #include <net/dsa.h> #include <net/wext.h> @@ -25,79 +27,108 @@ static int dev_ifname(struct net *net, struct ifreq *ifr) return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex); } -static gifconf_func_t *gifconf_list[NPROTO]; - -/** - * register_gifconf - register a SIOCGIF handler - * @family: Address family - * @gifconf: Function handler - * - * Register protocol dependent address dumping routines. The handler - * that is passed must not be freed or reused until it has been replaced - * by another handler. - */ -int register_gifconf(unsigned int family, gifconf_func_t *gifconf) -{ - if (family >= NPROTO) - return -EINVAL; - gifconf_list[family] = gifconf; - return 0; -} -EXPORT_SYMBOL(register_gifconf); - /* * Perform a SIOCGIFCONF call. This structure will change * size eventually, and there is nothing I can do about it. * Thus we will need a 'compatibility mode'. */ - -int dev_ifconf(struct net *net, struct ifconf *ifc, int size) +int dev_ifconf(struct net *net, struct ifconf __user *uifc) { struct net_device *dev; - char __user *pos; - int len; - int total; - int i; + void __user *pos; + size_t size; + int len, total = 0, done; - /* - * Fetch the caller's info block. - */ + /* both the ifconf and the ifreq structures are slightly different */ + if (in_compat_syscall()) { + struct compat_ifconf ifc32; - pos = ifc->ifc_buf; - len = ifc->ifc_len; + if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf))) + return -EFAULT; - /* - * Loop over the interfaces, and write an info block for each. - */ + pos = compat_ptr(ifc32.ifcbuf); + len = ifc32.ifc_len; + size = sizeof(struct compat_ifreq); + } else { + struct ifconf ifc; + + if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) + return -EFAULT; - total = 0; + pos = ifc.ifc_buf; + len = ifc.ifc_len; + size = sizeof(struct ifreq); + } + + /* Loop over the interfaces, and write an info block for each. */ + rtnl_lock(); for_each_netdev(net, dev) { - for (i = 0; i < NPROTO; i++) { - if (gifconf_list[i]) { - int done; - if (!pos) - done = gifconf_list[i](dev, NULL, 0, size); - else - done = gifconf_list[i](dev, pos + total, - len - total, size); - if (done < 0) - return -EFAULT; - total += done; - } + if (!pos) + done = inet_gifconf(dev, NULL, 0, size); + else + done = inet_gifconf(dev, pos + total, + len - total, size); + if (done < 0) { + rtnl_unlock(); + return -EFAULT; } + total += done; } + rtnl_unlock(); - /* - * All done. Write the updated control block back to the caller. - */ - ifc->ifc_len = total; + return put_user(total, &uifc->ifc_len); +} + +static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) +{ + struct ifmap *ifmap = &ifr->ifr_map; + + if (in_compat_syscall()) { + struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap; + + cifmap->mem_start = dev->mem_start; + cifmap->mem_end = dev->mem_end; + cifmap->base_addr = dev->base_addr; + cifmap->irq = dev->irq; + cifmap->dma = dev->dma; + cifmap->port = dev->if_port; + + return 0; + } + + ifmap->mem_start = dev->mem_start; + ifmap->mem_end = dev->mem_end; + ifmap->base_addr = dev->base_addr; + ifmap->irq = dev->irq; + ifmap->dma = dev->dma; + ifmap->port = dev->if_port; - /* - * Both BSD and Solaris return 0 here, so we do too. - */ return 0; } +static int dev_setifmap(struct net_device *dev, struct ifreq *ifr) +{ + struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; + + if (!dev->netdev_ops->ndo_set_config) + return -EOPNOTSUPP; + + if (in_compat_syscall()) { + struct ifmap ifmap = { + .mem_start = cifmap->mem_start, + .mem_end = cifmap->mem_end, + .base_addr = cifmap->base_addr, + .irq = cifmap->irq, + .dma = cifmap->dma, + .port = cifmap->port, + }; + + return dev->netdev_ops->ndo_set_config(dev, &ifmap); + } + + return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map); +} + /* * Perform the SIOCxIFxxx calls, inside rcu_read_lock() */ @@ -128,13 +159,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm break; case SIOCGIFMAP: - ifr->ifr_map.mem_start = dev->mem_start; - ifr->ifr_map.mem_end = dev->mem_end; - ifr->ifr_map.base_addr = dev->base_addr; - ifr->ifr_map.irq = dev->irq; - ifr->ifr_map.dma = dev->dma; - ifr->ifr_map.port = dev->if_port; - return 0; + return dev_getifmap(dev, ifr); case SIOCGIFINDEX: ifr->ifr_ifindex = dev->ifindex; @@ -215,19 +240,19 @@ static int net_hwtstamp_validate(struct ifreq *ifr) return 0; } -static int dev_do_ioctl(struct net_device *dev, - struct ifreq *ifr, unsigned int cmd) +static int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; int err; - err = dsa_ndo_do_ioctl(dev, ifr, cmd); + err = dsa_ndo_eth_ioctl(dev, ifr, cmd); if (err == 0 || err != -EOPNOTSUPP) return err; - if (ops->ndo_do_ioctl) { + if (ops->ndo_eth_ioctl) { if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); + err = ops->ndo_eth_ioctl(dev, ifr, cmd); else err = -ENODEV; } @@ -235,10 +260,55 @@ static int dev_do_ioctl(struct net_device *dev, return err; } +static int dev_siocbond(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_siocbond) { + if (netif_device_present(dev)) + return ops->ndo_siocbond(dev, ifr, cmd); + else + return -ENODEV; + } + + return -EOPNOTSUPP; +} + +static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_siocdevprivate) { + if (netif_device_present(dev)) + return ops->ndo_siocdevprivate(dev, ifr, data, cmd); + else + return -ENODEV; + } + + return -EOPNOTSUPP; +} + +static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_siocwandev) { + if (netif_device_present(dev)) + return ops->ndo_siocwandev(dev, ifs); + else + return -ENODEV; + } + + return -EOPNOTSUPP; +} + /* * Perform the SIOCxIFxxx calls, inside rtnl_lock() */ -static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) +static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, + unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); @@ -275,12 +345,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFMAP: - if (ops->ndo_set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return ops->ndo_set_config(dev, &ifr->ifr_map); - } - return -EOPNOTSUPP; + return dev_setifmap(dev, ifr); case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || @@ -307,6 +372,22 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCWANDEV: + return dev_siocwandev(dev, &ifr->ifr_settings); + + case SIOCBRADDIF: + case SIOCBRDELIF: + if (!netif_device_present(dev)) + return -ENODEV; + if (!netif_is_bridge_master(dev)) + return -EOPNOTSUPP; + dev_hold(dev); + rtnl_unlock(); + err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); + dev_put(dev); + rtnl_lock(); + return err; + case SIOCSHWTSTAMP: err = net_hwtstamp_validate(ifr); if (err) @@ -317,23 +398,23 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) * Unknown or private ioctl */ default: - if ((cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCBONDENSLAVE || + if (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) + return dev_siocdevprivate(dev, ifr, data, cmd); + + if (cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG || + cmd == SIOCSHWTSTAMP || + cmd == SIOCGHWTSTAMP) { + err = dev_eth_ioctl(dev, ifr, cmd); + } else if (cmd == SIOCBONDENSLAVE || cmd == SIOCBONDRELEASE || cmd == SIOCBONDSETHWADDR || cmd == SIOCBONDSLAVEINFOQUERY || cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || - cmd == SIOCBRADDIF || - cmd == SIOCBRDELIF || - cmd == SIOCSHWTSTAMP || - cmd == SIOCGHWTSTAMP || - cmd == SIOCWANDEV) { - err = dev_do_ioctl(dev, ifr, cmd); + cmd == SIOCBONDCHANGEACTIVE) { + err = dev_siocbond(dev, ifr, cmd); } else err = -EINVAL; @@ -386,7 +467,8 @@ EXPORT_SYMBOL(dev_load); * positive or a negative errno code on error. */ -int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout) +int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, + void __user *data, bool *need_copyout) { int ret; char *colon; @@ -437,7 +519,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCETHTOOL: dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = dev_ethtool(net, ifr); + ret = dev_ethtool(net, ifr, data); rtnl_unlock(); if (colon) *colon = ':'; @@ -456,7 +538,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; rtnl_lock(); - ret = dev_ifsioc(net, ifr, cmd); + ret = dev_ifsioc(net, ifr, data, cmd); rtnl_unlock(); if (colon) *colon = ':'; @@ -502,7 +584,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCBONDINFOQUERY: dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, ifr, cmd); + ret = dev_ifsioc(net, ifr, data, cmd); rtnl_unlock(); if (need_copyout) *need_copyout = false; @@ -527,7 +609,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c cmd <= SIOCDEVPRIVATE + 15)) { dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, ifr, cmd); + ret = dev_ifsioc(net, ifr, data, cmd); rtnl_unlock(); return ret; } diff --git a/net/core/devlink.c b/net/core/devlink.c index 85032626de24..8fa015319af6 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -108,19 +108,6 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); -static void __devlink_net_set(struct devlink *devlink, struct net *net) -{ - write_pnet(&devlink->_net, net); -} - -void devlink_net_set(struct devlink *devlink, struct net *net) -{ - if (WARN_ON(devlink->registered)) - return; - __devlink_net_set(devlink, net); -} -EXPORT_SYMBOL_GPL(devlink_net_set); - static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { @@ -1043,7 +1030,7 @@ static void devlink_port_notify(struct devlink_port *devlink_port, struct sk_buff *msg; int err; - if (!devlink_port->registered) + if (!devlink_port->devlink) return; WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); @@ -3801,10 +3788,12 @@ static void devlink_param_notify(struct devlink *devlink, struct devlink_param_item *param_item, enum devlink_command cmd); -static void devlink_reload_netns_change(struct devlink *devlink, - struct net *dest_net) +static void devlink_ns_change_notify(struct devlink *devlink, + struct net *dest_net, struct net *curr_net, + bool new) { struct devlink_param_item *param_item; + enum devlink_command cmd; /* Userspace needs to be notified about devlink objects * removed from original and entering new network namespace. @@ -3812,17 +3801,18 @@ static void devlink_reload_netns_change(struct devlink *devlink, * reload process so the notifications are generated separatelly. */ - list_for_each_entry(param_item, &devlink->param_list, list) - devlink_param_notify(devlink, 0, param_item, - DEVLINK_CMD_PARAM_DEL); - devlink_notify(devlink, DEVLINK_CMD_DEL); + if (!dest_net || net_eq(dest_net, curr_net)) + return; - __devlink_net_set(devlink, dest_net); + if (new) + devlink_notify(devlink, DEVLINK_CMD_NEW); - devlink_notify(devlink, DEVLINK_CMD_NEW); + cmd = new ? DEVLINK_CMD_PARAM_NEW : DEVLINK_CMD_PARAM_DEL; list_for_each_entry(param_item, &devlink->param_list, list) - devlink_param_notify(devlink, 0, param_item, - DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, 0, param_item, cmd); + + if (!new) + devlink_notify(devlink, DEVLINK_CMD_DEL); } static bool devlink_reload_supported(const struct devlink_ops *ops) @@ -3902,6 +3892,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, u32 *actions_performed, struct netlink_ext_ack *extack) { u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; + struct net *curr_net; int err; if (!devlink->reload_enabled) @@ -3909,18 +3900,22 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); + + curr_net = devlink_net(devlink); + devlink_ns_change_notify(devlink, dest_net, curr_net, false); err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack); if (err) return err; - if (dest_net && !net_eq(dest_net, devlink_net(devlink))) - devlink_reload_netns_change(devlink, dest_net); + if (dest_net && !net_eq(dest_net, curr_net)) + write_pnet(&devlink->_net, dest_net); err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); if (err) return err; + devlink_ns_change_notify(devlink, dest_net, curr_net, true); WARN_ON(!(*actions_performed & BIT(action))); /* Catch driver on updating the remote action within devlink reload */ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats, @@ -8768,15 +8763,18 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) } /** - * devlink_alloc - Allocate new devlink instance resources + * devlink_alloc_ns - Allocate new devlink instance resources + * in specific namespace * * @ops: ops * @priv_size: size of user private data + * @net: net namespace * * Allocate new devlink instance resources, including devlink index * and name. */ -struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) +struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + size_t priv_size, struct net *net) { struct devlink *devlink; @@ -8791,7 +8789,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) return NULL; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); - __devlink_net_set(devlink, &init_net); + write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->sb_list); @@ -8807,7 +8805,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) mutex_init(&devlink->reporters_lock); return devlink; } -EXPORT_SYMBOL_GPL(devlink_alloc); +EXPORT_SYMBOL_GPL(devlink_alloc_ns); /** * devlink_register - Register devlink instance @@ -8817,8 +8815,8 @@ EXPORT_SYMBOL_GPL(devlink_alloc); */ int devlink_register(struct devlink *devlink, struct device *dev) { + WARN_ON(devlink->dev); devlink->dev = dev; - devlink->registered = true; mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); @@ -8960,9 +8958,10 @@ int devlink_port_register(struct devlink *devlink, mutex_unlock(&devlink->lock); return -EEXIST; } + + WARN_ON(devlink_port->devlink); devlink_port->devlink = devlink; devlink_port->index = port_index; - devlink_port->registered = true; spin_lock_init(&devlink_port->type_lock); INIT_LIST_HEAD(&devlink_port->reporter_list); mutex_init(&devlink_port->reporters_lock); @@ -9001,7 +9000,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type type, void *type_dev) { - if (WARN_ON(!devlink_port->registered)) + if (WARN_ON(!devlink_port->devlink)) return; devlink_port_type_warn_cancel(devlink_port); spin_lock_bh(&devlink_port->type_lock); @@ -9121,7 +9120,7 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, { int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; devlink_port->attrs = *attrs; ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); @@ -9145,7 +9144,7 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_PF); @@ -9172,7 +9171,7 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_VF); @@ -9200,7 +9199,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_SF); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ead2a8aa57b4..49442cae6f69 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - if (hw_metadata->input_dev) - dev_hold(hw_metadata->input_dev); + dev_hold(hw_metadata->input_dev); return hw_metadata; @@ -867,8 +866,7 @@ free_hw_metadata: static void net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata) { - if (hw_metadata->input_dev) - dev_put(hw_metadata->input_dev); + dev_put(hw_metadata->input_dev); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); diff --git a/net/core/dst.c b/net/core/dst.c index fb3bcba87744..497ef9b3fc6a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - if (dev) - dev_hold(dev); + dev_hold(dev); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - if (dst->dev) - dev_put(dst->dev); + dev_put(dst->dev); lwtstate_put(dst->lwtstate); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a9f937975080..79df7cd9dbc1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, { struct fib_rule *r; - r = kzalloc(ops->rule_size, GFP_KERNEL); + r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT); if (r == NULL) return -ENOMEM; @@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - nlrule = kzalloc(ops->rule_size, GFP_KERNEL); + nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT); if (!nlrule) { err = -ENOMEM; goto errout; diff --git a/net/core/filter.c b/net/core/filter.c index d70187ce851b..6f493ef5bb14 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -77,6 +77,7 @@ #include <net/transp_v6.h> #include <linux/btf_ids.h> #include <net/tls.h> +#include <net/xdp.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -2179,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, hh_len); - if (unlikely(!skb2)) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); @@ -2213,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, } rcu_read_unlock_bh(); if (dst) - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: kfree_skb(skb); return -ENETDOWN; @@ -2286,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, hh_len); - if (unlikely(!skb2)) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); @@ -3880,8 +3864,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) if (unlikely(meta < xdp_frame_end || meta > xdp->data)) return -EINVAL; - if (unlikely((metalen & (sizeof(__u32) - 1)) || - (metalen > 32))) + if (unlikely(xdp_metalen_invalid(metalen))) return -EACCES; xdp->data_meta = meta; @@ -4040,8 +4023,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, goto err; consume_skb(skb); break; + case BPF_MAP_TYPE_CPUMAP: + err = cpu_map_generic_redirect(fwd, skb); + if (unlikely(err)) + goto err; + break; default: - /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ err = -EBADRQC; goto err; } @@ -5012,6 +4999,40 @@ err_clear: return -EINVAL; } +BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return _bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_setsockopt_proto = { + .func = bpf_sk_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return _bpf_getsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_getsockopt_proto = { + .func = bpf_sk_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, int, level, int, optname, char *, optval, int, optlen) { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 4b2415d34873..bac0184cf3de 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1056,8 +1056,10 @@ proto_again: FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); - memcpy(&key_addrs->v4addrs, &iph->saddr, - sizeof(key_addrs->v4addrs)); + memcpy(&key_addrs->v4addrs.src, &iph->saddr, + sizeof(key_addrs->v4addrs.src)); + memcpy(&key_addrs->v4addrs.dst, &iph->daddr, + sizeof(key_addrs->v4addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } @@ -1101,8 +1103,10 @@ proto_again: FLOW_DISSECTOR_KEY_IPV6_ADDRS, target_container); - memcpy(&key_addrs->v6addrs, &iph->saddr, - sizeof(key_addrs->v6addrs)); + memcpy(&key_addrs->v6addrs.src, &iph->saddr, + sizeof(key_addrs->v6addrs.src)); + memcpy(&key_addrs->v6addrs.dst, &iph->daddr, + sizeof(key_addrs->v6addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 8ec7d13d2860..d0ae987d2de9 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -43,6 +43,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "SEG6LOCAL"; case LWTUNNEL_ENCAP_RPL: return "RPL"; + case LWTUNNEL_ENCAP_IOAM6: + return "IOAM6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 53e85c70c6e5..b963d6b02c4f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - if (dev) - dev_hold(dev); + dev_hold(dev); if (tbl->pconstructor && tbl->pconstructor(n)) { - if (dev) - dev_put(dev); + dev_put(dev); kfree(n); n = NULL; goto out; @@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); + dev_put(n->dev); kfree(n); return 0; } @@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); + dev_put(n->dev); kfree(n); } return -ENOENT; @@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - if (parms->dev) - dev_put(parms->dev); + dev_put(parms->dev); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); @@ -3315,12 +3310,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); + seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); return 0; } - seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx %08lx %08lx\n", + seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " + "%08lx %08lx %08lx " + "%08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7e258d255e90..314f97acf39d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1190,11 +1190,6 @@ static ssize_t pktgen_if_write(struct file *file, * pktgen_xmit() is called */ pkt_dev->last_ok = 1; - - /* override clone_skb if user passed default value - * at module loading time - */ - pkt_dev->clone_skb = 0; } else if (strcmp(f, "queue_xmit") == 0) { pkt_dev->xmit_mode = M_QUEUE_XMIT; pkt_dev->last_ok = 1; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f6af3e74fc44..7c9d32cfe607 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -710,15 +710,8 @@ out: int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; - int err = 0; - NETLINK_CB(skb).dst_group = group; - if (echo) - refcount_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); - if (echo) - err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); - return err; + return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL); } int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) @@ -733,12 +726,8 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *rtnl = net->rtnl; - int report = 0; - if (nlh) - report = nlmsg_report(nlh); - - nlmsg_notify(rtnl, skb, pid, group, report, flags); + nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags); } EXPORT_SYMBOL(rtnl_notify); @@ -2268,7 +2257,8 @@ invalid_attr: return -EINVAL; } -static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) +static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { if (dev) { if (tb[IFLA_ADDRESS] && @@ -2295,7 +2285,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, af); + err = af_ops->validate_link_af(dev, af, extack); if (err < 0) return err; } @@ -2603,7 +2593,7 @@ static int do_setlink(const struct sk_buff *skb, const struct net_device_ops *ops = dev->netdev_ops; int err; - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; @@ -3301,7 +3291,7 @@ replay: m_ops = master_dev->rtnl_link_ops; } - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; diff --git a/net/core/scm.c b/net/core/scm.c index ae3085d9aae8..5c356f0dee30 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) { - fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL); + fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT); if (!fpl) return -ENOMEM; *fplp = fpl; @@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) return NULL; new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (new_fpl) { for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); diff --git a/net/core/selftests.c b/net/core/selftests.c index ba7b0171974c..9077fa969892 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -318,6 +318,15 @@ static int net_test_phy_loopback_udp(struct net_device *ndev) return __net_test_loopback(ndev, &attr); } +static int net_test_phy_loopback_udp_mtu(struct net_device *ndev) +{ + struct net_packet_attrs attr = { }; + + attr.dst = ndev->dev_addr; + attr.max_size = ndev->mtu; + return __net_test_loopback(ndev, &attr); +} + static int net_test_phy_loopback_tcp(struct net_device *ndev) { struct net_packet_attrs attr = { }; @@ -345,6 +354,9 @@ static const struct net_test { .name = "PHY internal loopback, UDP ", .fn = net_test_phy_loopback_udp, }, { + .name = "PHY internal loopback, MTU ", + .fn = net_test_phy_loopback_udp_mtu, + }, { .name = "PHY internal loopback, TCP ", .fn = net_test_phy_loopback_tcp, }, { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fc7942c0dddc..9240af2ea8c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -954,9 +954,13 @@ void __kfree_skb_defer(struct sk_buff *skb) void napi_skb_free_stolen_head(struct sk_buff *skb) { - nf_reset_ct(skb); - skb_dst_drop(skb); - skb_ext_put(skb); + if (unlikely(skb->slow_gro)) { + nf_reset_ct(skb); + skb_dst_drop(skb); + skb_ext_put(skb); + skb_orphan(skb); + skb->slow_gro = 0; + } napi_skb_cache_put(skb); } @@ -1786,6 +1790,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) EXPORT_SYMBOL(skb_realloc_headroom); /** + * skb_expand_head - reallocate header of &sk_buff + * @skb: buffer to reallocate + * @headroom: needed headroom + * + * Unlike skb_realloc_headroom, this one does not allocate a new skb + * if possible; copies skb->sk to new skb as needed + * and frees original skb in case of failures. + * + * It expect increased headroom and generates warning otherwise. + */ + +struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) +{ + int delta = headroom - skb_headroom(skb); + + if (WARN_ONCE(delta <= 0, + "%s is expecting an increase in the headroom", __func__)) + return skb; + + /* pskb_expand_head() might crash, if skb is shared */ + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (likely(nskb)) { + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); + consume_skb(skb); + } else { + kfree_skb(skb); + } + skb = nskb; + } + if (skb && + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(skb); + skb = NULL; + } + return skb; +} +EXPORT_SYMBOL(skb_expand_head); + +/** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head @@ -3889,6 +3935,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->last = skb; NAPI_GRO_CB(p)->count++; p->data_len += skb->len; + + /* sk owenrship - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; p->truesize += skb->truesize; p->len += skb->len; @@ -4256,6 +4305,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; + unsigned int new_truesize; struct sk_buff *lp; if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) @@ -4287,10 +4337,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ - delta_truesize = skb->truesize - - SKB_TRUESIZE(skb_end_offset(skb)); + new_truesize = SKB_TRUESIZE(skb_end_offset(skb)); + delta_truesize = skb->truesize - new_truesize; - skb->truesize -= skb->data_len; + skb->truesize = new_truesize; skb->len -= skb->data_len; skb->data_len = 0; @@ -4319,12 +4369,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ - delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); + new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); + delta_truesize = skb->truesize - new_truesize; + skb->truesize = new_truesize; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } merge: + /* sk owenrship - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; @@ -6449,6 +6503,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) new->chunks = newlen; new->offset[id] = newoff; set_active: + skb->slow_gro = 1; skb->extensions = new; skb->active_extensions |= 1 << id; return skb_ext_get_ptr(new, id); diff --git a/net/core/sock.c b/net/core/sock.c index a3eea6e0b30a..aada649e07e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -226,6 +226,7 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \ + x "AF_MCTP" , \ x "AF_MAX" static const char *const af_family_key_strings[AF_MAX+1] = { @@ -1357,6 +1358,15 @@ set_sndbuf: ret = sock_bindtoindex_locked(sk, val); break; + case SO_BUF_LOCK: + if (val & ~SOCK_BUF_LOCK_MASK) { + ret = -EINVAL; + break; + } + sk->sk_userlocks = val | (sk->sk_userlocks & + ~SOCK_BUF_LOCK_MASK); + break; + default: ret = -ENOPROTOOPT; break; @@ -1719,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val64 = sock_net(sk)->net_cookie; break; + case SO_BUF_LOCK: + v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 60decd6420ca..ae5fa4338d9c 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -211,8 +211,6 @@ out: return psock; } -static bool sock_map_redirect_allowed(const struct sock *sk); - static int sock_map_link(struct bpf_map *map, struct sock *sk) { struct sk_psock_progs *progs = sock_map_progs(map); @@ -223,13 +221,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) struct sk_psock *psock; int ret; - /* Only sockets we can redirect into/from in BPF need to hold - * refs to parser/verdict progs and have their sk_data_ready - * and sk_write_space callbacks overridden. - */ - if (!sock_map_redirect_allowed(sk)) - goto no_progs; - stream_verdict = READ_ONCE(progs->stream_verdict); if (stream_verdict) { stream_verdict = bpf_prog_inc_not_zero(stream_verdict); @@ -264,7 +255,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) } } -no_progs: psock = sock_map_psock_get_checked(sk); if (IS_ERR(psock)) { ret = PTR_ERR(psock); @@ -527,12 +517,6 @@ static bool sk_is_tcp(const struct sock *sk) sk->sk_protocol == IPPROTO_TCP; } -static bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - static bool sock_map_redirect_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) @@ -550,10 +534,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); - else if (sk_is_udp(sk)) - return sk_hashed(sk); - - return false; + return true; } static int sock_hash_update_common(struct bpf_map *map, void *key, @@ -1536,6 +1517,7 @@ void sock_map_close(struct sock *sk, long timeout) release_sock(sk); saved_close(sk, timeout); } +EXPORT_SYMBOL_GPL(sock_map_close); static int sock_map_iter_attach_target(struct bpf_prog *prog, union bpf_iter_link_info *linfo, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 7eb0fb231940..abb5c596a817 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1126,7 +1126,7 @@ static int __init dccp_init(void) dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_free_hashinfo2; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index d1c50a48614b..0ee7d4c0c955 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force) } spin_unlock(&dndev_lock); - if (old) - dev_put(old); + dev_put(old); return rv; } @@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev) } spin_unlock(&dndev_lock); - if (dev) - dev_put(dev); + dev_put(dev); } /* diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 77fbf8e9df4b..269c029ad74f 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) } change_nexthops(fi) { - if (nh->nh_dev) - dev_put(nh->nh_dev); + dev_put(nh->nh_dev); nh->nh_dev = NULL; } endfor_nexthops(fi); kfree(fi); @@ -102,7 +101,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) void dn_fib_release_info(struct dn_fib_info *fi) { spin_lock(&dn_fib_info_lock); - if (fi && --fi->fib_treeref == 0) { + if (fi && refcount_dec_and_test(&fi->fib_treeref)) { if (fi->fib_next) fi->fib_next->fib_prev = fi->fib_prev; if (fi->fib_prev) @@ -385,11 +384,11 @@ link_it: if ((ofi = dn_fib_find_info(fi)) != NULL) { fi->fib_dead = 1; dn_fib_free_info(fi); - ofi->fib_treeref++; + refcount_inc(&ofi->fib_treeref); return ofi; } - fi->fib_treeref++; + refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 729d3de6020d..7e85f2a1ae25 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1026,8 +1026,7 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); err = -EINVAL; dev_out = init_net.loopback_dev; if (!dev_out->dn_ptr) @@ -1084,8 +1083,7 @@ source_ok: neigh_release(neigh); neigh = NULL; } else { - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fld.daddr)) { dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; @@ -1144,8 +1142,7 @@ select_source: if (res.type == RTN_LOCAL) { if (!fld.saddr) fld.saddr = fld.daddr; - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); if (!dev_out->dn_ptr) @@ -1168,8 +1165,7 @@ select_source: if (!fld.saddr) fld.saddr = DN_FIB_RES_PREFSRC(res); - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); fld.flowidn_oif = dev_out->ifindex; @@ -1222,8 +1218,7 @@ done: neigh_release(neigh); if (free_res) dn_fib_res_put(&res); - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); out: return err; @@ -1503,8 +1498,7 @@ done: if (free_res) dn_fib_res_put(&res); dev_put(in_dev); - if (out_dev) - dev_put(out_dev); + dev_put(out_dev); out: return err; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 00bb89b2d86f..970906eb5b2c 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -18,16 +18,6 @@ if NET_DSA # Drivers must select the appropriate tagging format(s) -config NET_DSA_TAG_8021Q - tristate - select VLAN_8021Q - help - Unlike the other tagging protocols, the 802.1Q config option simply - provides helpers for other tagging implementations that might rely on - VLAN in one way or another. It is not a complete solution. - - Drivers which use these helpers should select this as dependency. - config NET_DSA_TAG_AR9331 tristate "Tag driver for Atheros AR9331 SoC with built-in switch" help @@ -126,7 +116,6 @@ config NET_DSA_TAG_OCELOT_8021Q tristate "Tag driver for Ocelot family of switches, using VLAN" depends on MSCC_OCELOT_SWITCH_LIB || \ (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) - select NET_DSA_TAG_8021Q help Say Y or M if you want to enable support for tagging frames with a custom VLAN-based header. Frames that require timestamping, such as @@ -149,7 +138,7 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" - select NET_DSA_TAG_8021Q + depends on (NET_DSA_SJA1105 && NET_DSA_SJA1105_PTP) || !NET_DSA_SJA1105 || !NET_DSA_SJA1105_PTP select PACKING help Say Y or M if you want to enable support for tagging frames with the diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 44bc79952b8b..67ea009f242c 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,10 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o +dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o # tagging formats -obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 84cad1be9ce4..1dc45e40f961 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = cpu_dp->rcv(skb, dev, pt); + nskb = cpu_dp->rcv(skb, dev); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 185629f27f80..a4c525f1cb17 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -311,6 +311,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } +/* Assign the default CPU port (the first one in the tree) to all ports of the + * fabric which don't already have one as part of their own switch. + */ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp, *dp; @@ -321,15 +324,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return -EINVAL; } - /* Assign the default CPU port to all ports of the fabric */ - list_for_each_entry(dp, &dst->ports, list) + list_for_each_entry(dp, &dst->ports, list) { + if (dp->cpu_dp) + continue; + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) dp->cpu_dp = cpu_dp; + } return 0; } -static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) +/* Perform initial assignment of CPU ports to user ports and DSA links in the + * fabric, giving preference to CPU ports local to each switch. Default to + * using the first CPU port in the switch tree if the port does not have a CPU + * port local to this switch. + */ +static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp, *dp; + + list_for_each_entry(cpu_dp, &dst->ports, list) { + if (!dsa_port_is_cpu(cpu_dp)) + continue; + + list_for_each_entry(dp, &dst->ports, list) { + /* Prefer a local CPU port */ + if (dp->ds != cpu_dp->ds) + continue; + + /* Prefer the first local CPU port found */ + if (dp->cpu_dp) + continue; + + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = cpu_dp; + } + } + + return dsa_tree_setup_default_cpu(dst); +} + +static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -921,13 +957,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (!complete) return 0; - err = dsa_tree_setup_default_cpu(dst); + err = dsa_tree_setup_cpu_ports(dst); if (err) return err; err = dsa_tree_setup_switches(dst); if (err) - goto teardown_default_cpu; + goto teardown_cpu_ports; err = dsa_tree_setup_master(dst); if (err) @@ -947,8 +983,8 @@ teardown_master: dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); -teardown_default_cpu: - dsa_tree_teardown_default_cpu(dst); +teardown_cpu_ports: + dsa_tree_teardown_cpu_ports(dst); return err; } @@ -966,7 +1002,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_switches(dst); - dsa_tree_teardown_default_cpu(dst); + dsa_tree_teardown_cpu_ports(dst); list_for_each_entry_safe(dl, next, &dst->rtable, list) { list_del(&dl->list); @@ -1044,6 +1080,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) dp->ds = ds; dp->index = index; + dp->bridge_num = -1; INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); @@ -1265,6 +1302,9 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds, return -EEXIST; } + if (ds->dst->last_switch < ds->index) + ds->dst->last_switch = ds->index; + return 0; } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f201c33980bf..e43c5dc04282 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -14,6 +14,8 @@ #include <net/dsa.h> #include <net/gro_cells.h> +#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG + enum { DSA_NOTIFIER_AGEING_TIME, DSA_NOTIFIER_BRIDGE_JOIN, @@ -39,6 +41,8 @@ enum { DSA_NOTIFIER_MRP_DEL, DSA_NOTIFIER_MRP_ADD_RING_ROLE, DSA_NOTIFIER_MRP_DEL_RING_ROLE, + DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, + DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, }; /* DSA_NOTIFIER_AGEING_TIME */ @@ -113,6 +117,14 @@ struct dsa_notifier_mrp_ring_role_info { int port; }; +/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */ +struct dsa_notifier_tag_8021q_vlan_info { + int tree_index; + int sw_index; + int port; + u16 vid; +}; + struct dsa_switchdev_event_work { struct dsa_switch *ds; int port; @@ -194,16 +206,14 @@ void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack); -int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br, - struct netlink_ext_ack *extack); +void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_lag_change(struct dsa_port *dp, struct netdev_lag_lower_state_info *linfo); int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, struct netdev_lag_upper_info *uinfo, struct netlink_ext_ack *extack); -int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev, - struct netlink_ext_ack *extack); +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct netlink_ext_ack *extack); @@ -253,16 +263,18 @@ int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid); +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, - struct net_device *dev) + const struct net_device *dev) { return dsa_port_to_bridge_port(dp) == dev; } static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, - struct net_device *bridge_dev) + const struct net_device *bridge_dev) { /* DSA ports connected to a bridge, and event was emitted * for the bridge. @@ -272,7 +284,7 @@ static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, /* Returns true if any port of this tree offloads the given net_device */ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, - struct net_device *dev) + const struct net_device *dev) { struct dsa_port *dp; @@ -283,6 +295,19 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, return false; } +/* Returns true if any port of this tree offloads the given bridge */ +static inline bool dsa_tree_offloads_bridge(struct dsa_switch_tree *dst, + const struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge(dp, bridge_dev)) + return true; + + return false; +} + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; extern struct notifier_block dsa_slave_switchdev_notifier; @@ -372,6 +397,63 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) return skb; } +/* For switches without hardware support for DSA tagging to be able + * to support termination through the bridge. + */ +static inline struct net_device * +dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct bridge_vlan_info vinfo; + struct net_device *slave; + struct dsa_port *dp; + int err; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->type != DSA_PORT_TYPE_USER) + continue; + + if (!dp->bridge_dev) + continue; + + if (dp->stp_state != BR_STATE_LEARNING && + dp->stp_state != BR_STATE_FORWARDING) + continue; + + /* Since the bridge might learn this packet, keep the CPU port + * affinity with the port that will be used for the reply on + * xmit. + */ + if (dp->cpu_dp != cpu_dp) + continue; + + slave = dp->slave; + + err = br_vlan_get_info_rcu(slave, vid, &vinfo); + if (err) + continue; + + return slave; + } + + return NULL; +} + +/* If the ingress port offloads the bridge, we mark the frame as autonomously + * forwarded by hardware, so the software bridge doesn't forward in twice, back + * to us, because we already did. However, if we're in fallback mode and we do + * software bridging, we are not offloading it, therefore the dp->bridge_dev + * pointer is not populated, and flooding needs to be done by software (we are + * effectively operating in standalone ports mode). + */ +static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + + skb->offload_fwd_mark = !!(dp->bridge_dev); +} + /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); @@ -386,6 +468,16 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); +/* tag_8021q.c */ +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info); +int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info); +int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); +int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); + extern struct list_head dsa_tree_list; #endif diff --git a/net/dsa/master.c b/net/dsa/master.c index 3fc90e36772d..e8e19857621b 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -210,14 +210,14 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } - if (dev->netdev_ops->ndo_do_ioctl) - err = dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_eth_ioctl) + err = dev->netdev_ops->ndo_eth_ioctl(dev, ifr, cmd); return err; } static const struct dsa_netdevice_ops dsa_netdev_ops = { - .ndo_do_ioctl = dsa_master_ioctl, + .ndo_eth_ioctl = dsa_master_ioctl, }; static int dsa_master_ethtool_setup(struct net_device *dev) diff --git a/net/dsa/port.c b/net/dsa/port.c index 28b45b7e66df..b927d94b6934 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -167,8 +167,8 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp) } } -static int dsa_port_switchdev_sync(struct dsa_port *dp, - struct netlink_ext_ack *extack) +static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, + struct netlink_ext_ack *extack) { struct net_device *brport_dev = dsa_port_to_bridge_port(dp); struct net_device *br = dp->bridge_dev; @@ -194,59 +194,6 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - err = br_mdb_replay(br, brport_dev, dp, true, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - - /* Forwarding and termination FDB entries on the port */ - err = br_fdb_replay(br, brport_dev, dp, true, - &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - /* Termination FDB entries on the bridge itself */ - err = br_fdb_replay(br, br, dp, true, &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - err = br_vlan_replay(br, brport_dev, dp, true, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - - return 0; -} - -static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp, - struct net_device *br, - struct netlink_ext_ack *extack) -{ - struct net_device *brport_dev = dsa_port_to_bridge_port(dp); - int err; - - /* Delete the switchdev objects left on this port */ - err = br_mdb_replay(br, brport_dev, dp, false, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - - /* Forwarding and termination FDB entries on the port */ - err = br_fdb_replay(br, brport_dev, dp, false, - &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - /* Termination FDB entries on the bridge itself */ - err = br_fdb_replay(br, br, dp, false, &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - err = br_vlan_replay(br, brport_dev, dp, false, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - return 0; } @@ -283,6 +230,83 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) */ } +static int dsa_tree_find_bridge_num(struct dsa_switch_tree *dst, + struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + /* When preparing the offload for a port, it will have a valid + * dp->bridge_dev pointer but a not yet valid dp->bridge_num. + * However there might be other ports having the same dp->bridge_dev + * and a valid dp->bridge_num, so just ignore this port. + */ + list_for_each_entry(dp, &dst->ports, list) + if (dp->bridge_dev == bridge_dev && dp->bridge_num != -1) + return dp->bridge_num; + + return -1; +} + +static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst = dp->ds->dst; + int bridge_num = dp->bridge_num; + struct dsa_switch *ds = dp->ds; + + /* No bridge TX forwarding offload => do nothing */ + if (!ds->ops->port_bridge_tx_fwd_unoffload || dp->bridge_num == -1) + return; + + dp->bridge_num = -1; + + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!dsa_tree_find_bridge_num(dst, bridge_dev)) + clear_bit(bridge_num, &dst->fwd_offloading_bridges); + + /* Notify the chips only once the offload has been deactivated, so + * that they can update their configuration accordingly. + */ + ds->ops->port_bridge_tx_fwd_unoffload(ds, dp->index, bridge_dev, + bridge_num); +} + +static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst = dp->ds->dst; + struct dsa_switch *ds = dp->ds; + int bridge_num, err; + + if (!ds->ops->port_bridge_tx_fwd_offload) + return false; + + bridge_num = dsa_tree_find_bridge_num(dst, bridge_dev); + if (bridge_num < 0) { + /* First port that offloads TX forwarding for this bridge */ + bridge_num = find_first_zero_bit(&dst->fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES); + if (bridge_num >= ds->num_fwd_offloading_bridges) + return false; + + set_bit(bridge_num, &dst->fwd_offloading_bridges); + } + + dp->bridge_num = bridge_num; + + /* Notify the driver */ + err = ds->ops->port_bridge_tx_fwd_offload(ds, dp->index, bridge_dev, + bridge_num); + if (err) { + dsa_port_bridge_tx_fwd_unoffload(dp, bridge_dev); + return false; + } + + return true; +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack) { @@ -292,6 +316,9 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, .port = dp->index, .br = br, }; + struct net_device *dev = dp->slave; + struct net_device *brport_dev; + bool tx_fwd_offload; int err; /* Here the interface is already bridged. Reflect the current @@ -299,16 +326,31 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, */ dp->bridge_dev = br; + brport_dev = dsa_port_to_bridge_port(dp); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info); if (err) goto out_rollback; - err = dsa_port_switchdev_sync(dp, extack); + tx_fwd_offload = dsa_port_bridge_tx_fwd_offload(dp, br); + + err = switchdev_bridge_port_offload(brport_dev, dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier, + tx_fwd_offload, extack); if (err) goto out_rollback_unbridge; + err = dsa_port_switchdev_sync_attrs(dp, extack); + if (err) + goto out_rollback_unoffload; + return 0; +out_rollback_unoffload: + switchdev_bridge_port_unoffload(brport_dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier); out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); out_rollback: @@ -316,10 +358,13 @@ out_rollback: return err; } -int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br, - struct netlink_ext_ack *extack) +void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) { - return dsa_port_switchdev_unsync_objs(dp, br, extack); + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + + switchdev_bridge_port_unoffload(brport_dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier); } void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) @@ -337,6 +382,8 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) */ dp->bridge_dev = NULL; + dsa_port_bridge_tx_fwd_unoffload(dp, br); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); @@ -409,13 +456,10 @@ err_lag_join: return err; } -int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag, - struct netlink_ext_ack *extack) +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag) { if (dp->bridge_dev) - return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack); - - return 0; + dsa_port_pre_bridge_leave(dp, dp->bridge_dev); } void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) @@ -844,7 +888,6 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops) { - cpu_dp->filter = tag_ops->filter; cpu_dp->rcv = tag_ops->rcv; cpu_dp->tag_ops = tag_ops; } @@ -1217,3 +1260,31 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr) if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n"); } + +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid) +{ + struct dsa_notifier_tag_8021q_vlan_info info = { + .tree_index = dp->ds->dst->index, + .sw_index = dp->ds->index, + .port = dp->index, + .vid = vid, + }; + + return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info); +} + +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) +{ + struct dsa_notifier_tag_8021q_vlan_info info = { + .tree_index = dp->ds->dst->index, + .sw_index = dp->ds->index, + .port = dp->index, + .vid = vid, + }; + int err; + + err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info); + if (err) + pr_err("DSA: failed to notify tag_8021q VLAN deletion: %pe\n", + ERR_PTR(err)); +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 532085da8d8f..6e1135d3ee33 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1687,7 +1687,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_fdb_dump = dsa_slave_fdb_dump, - .ndo_do_ioctl = dsa_slave_ioctl, + .ndo_eth_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = dsa_slave_netpoll_setup, @@ -2056,20 +2056,16 @@ static int dsa_slave_prechangeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct netlink_ext_ack *extack; - int err = 0; - - extack = netdev_notifier_info_to_extack(&info->info); if (netif_is_bridge_master(info->upper_dev) && !info->linking) - err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack); + dsa_port_pre_bridge_leave(dp, info->upper_dev); else if (netif_is_lag_master(info->upper_dev) && !info->linking) - err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack); + dsa_port_pre_lag_leave(dp, info->upper_dev); /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be * meaningfully enslaved to a bridge yet */ - return notifier_from_errno(err); + return NOTIFY_DONE; } static int @@ -2357,26 +2353,98 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) kfree(switchdev_work); } -static int dsa_lower_dev_walk(struct net_device *lower_dev, - struct netdev_nested_priv *priv) +static bool dsa_foreign_dev_check(const struct net_device *dev, + const struct net_device *foreign_dev) { - if (dsa_slave_dev_check(lower_dev)) { - priv->data = (void *)netdev_priv(lower_dev); - return 1; - } + const struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch_tree *dst = dp->ds->dst; - return 0; + if (netif_is_bridge_master(foreign_dev)) + return !dsa_tree_offloads_bridge(dst, foreign_dev); + + if (netif_is_bridge_port(foreign_dev)) + return !dsa_tree_offloads_bridge_port(dst, foreign_dev); + + /* Everything else is foreign */ + return true; } -static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev) +static int dsa_slave_fdb_event(struct net_device *dev, + const struct net_device *orig_dev, + const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info, + unsigned long event) { - struct netdev_nested_priv priv = { - .data = NULL, - }; + struct dsa_switchdev_event_work *switchdev_work; + struct dsa_port *dp = dsa_slave_to_port(dev); + bool host_addr = fdb_info->is_local; + struct dsa_switch *ds = dp->ds; - netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv); + if (ctx && ctx != dp) + return 0; + + if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) + return -EOPNOTSUPP; + + if (dsa_slave_dev_check(orig_dev) && + switchdev_fdb_is_dynamically_learned(fdb_info)) + return 0; + + /* FDB entries learned by the software bridge should be installed as + * host addresses only if the driver requests assisted learning. + */ + if (switchdev_fdb_is_dynamically_learned(fdb_info) && + !ds->assisted_learning_on_cpu_port) + return 0; + + /* Also treat FDB entries on foreign interfaces bridged with us as host + * addresses. + */ + if (dsa_foreign_dev_check(dev, orig_dev)) + host_addr = true; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return -ENOMEM; - return (struct dsa_slave_priv *)priv.data; + netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n", + event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting", + orig_dev->name, fdb_info->addr, fdb_info->vid, + host_addr ? " as host address" : ""); + + INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); + switchdev_work->ds = ds; + switchdev_work->port = dp->index; + switchdev_work->event = event; + switchdev_work->dev = dev; + + ether_addr_copy(switchdev_work->addr, fdb_info->addr); + switchdev_work->vid = fdb_info->vid; + switchdev_work->host_addr = host_addr; + + /* Hold a reference for dsa_fdb_offload_notify */ + dev_hold(dev); + dsa_schedule_work(&switchdev_work->work); + + return 0; +} + +static int +dsa_slave_fdb_add_to_device(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info, + SWITCHDEV_FDB_ADD_TO_DEVICE); +} + +static int +dsa_slave_fdb_del_to_device(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info, + SWITCHDEV_FDB_DEL_TO_DEVICE); } /* Called under rcu_read_lock() */ @@ -2384,10 +2452,6 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); - const struct switchdev_notifier_fdb_info *fdb_info; - struct dsa_switchdev_event_work *switchdev_work; - bool host_addr = false; - struct dsa_port *dp; int err; switch (event) { @@ -2397,92 +2461,19 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, dsa_slave_port_attr_set); return notifier_from_errno(err); case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = switchdev_handle_fdb_add_to_device(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_fdb_add_to_device, + NULL); + return notifier_from_errno(err); case SWITCHDEV_FDB_DEL_TO_DEVICE: - fdb_info = ptr; - - if (dsa_slave_dev_check(dev)) { - dp = dsa_slave_to_port(dev); - - if (fdb_info->is_local) - host_addr = true; - else if (!fdb_info->added_by_user) - return NOTIFY_OK; - } else { - /* Snoop addresses added to foreign interfaces - * bridged with us, or the bridge - * itself. Dynamically learned addresses can - * also be added for switches that don't - * automatically learn SA from CPU-injected - * traffic. - */ - struct net_device *br_dev; - struct dsa_slave_priv *p; - - if (netif_is_bridge_master(dev)) - br_dev = dev; - else - br_dev = netdev_master_upper_dev_get_rcu(dev); - - if (!br_dev) - return NOTIFY_DONE; - - if (!netif_is_bridge_master(br_dev)) - return NOTIFY_DONE; - - p = dsa_slave_dev_lower_find(br_dev); - if (!p) - return NOTIFY_DONE; - - dp = p->dp; - host_addr = fdb_info->is_local; - - /* FDB entries learned by the software bridge should - * be installed as host addresses only if the driver - * requests assisted learning. - * On the other hand, FDB entries for local termination - * should always be installed. - */ - if (!fdb_info->added_by_user && !fdb_info->is_local && - !dp->ds->assisted_learning_on_cpu_port) - return NOTIFY_DONE; - - /* When the bridge learns an address on an offloaded - * LAG we don't want to send traffic to the CPU, the - * other ports bridged with the LAG should be able to - * autonomously forward towards it. - * On the other hand, if the address is local - * (therefore not learned) then we want to trap it to - * the CPU regardless of whether the interface it - * belongs to is offloaded or not. - */ - if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev) && - !fdb_info->is_local) - return NOTIFY_DONE; - } - - if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del) - return NOTIFY_DONE; - - switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); - if (!switchdev_work) - return NOTIFY_BAD; - - INIT_WORK(&switchdev_work->work, - dsa_slave_switchdev_event_work); - switchdev_work->ds = dp->ds; - switchdev_work->port = dp->index; - switchdev_work->event = event; - switchdev_work->dev = dev; - - ether_addr_copy(switchdev_work->addr, - fdb_info->addr); - switchdev_work->vid = fdb_info->vid; - switchdev_work->host_addr = host_addr; - - /* Hold a reference for dsa_fdb_offload_notify */ - dev_hold(dev); - dsa_schedule_work(&switchdev_work->work); - break; + err = switchdev_handle_fdb_del_to_device(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_fdb_del_to_device, + NULL); + return notifier_from_errno(err); default: return NOTIFY_DONE; } diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 5ece05dfd8f2..fd1a1c6bf9cf 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -90,18 +90,25 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { struct dsa_switch_tree *dst = ds->dst; + int err; if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_join) - return ds->ops->port_bridge_join(ds, info->port, info->br); + ds->ops->port_bridge_join) { + err = ds->ops->port_bridge_join(ds, info->port, info->br); + if (err) + return err; + } if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_join) - return ds->ops->crosschip_bridge_join(ds, info->tree_index, - info->sw_index, - info->port, info->br); + ds->ops->crosschip_bridge_join) { + err = ds->ops->crosschip_bridge_join(ds, info->tree_index, + info->sw_index, + info->port, info->br); + if (err) + return err; + } - return 0; + return dsa_tag_8021q_bridge_join(ds, info); } static int dsa_switch_bridge_leave(struct dsa_switch *ds, @@ -151,7 +158,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, if (err && err != EOPNOTSUPP) return err; } - return 0; + + return dsa_tag_8021q_bridge_leave(ds, info); } /* Matches for all upstream-facing ports (the CPU port and all upstream-facing @@ -726,6 +734,12 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_MRP_DEL_RING_ROLE: err = dsa_switch_mrp_del_ring_role(ds, info); break; + case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD: + err = dsa_switch_tag_8021q_vlan_add(ds, info); + break; + case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL: + err = dsa_switch_tag_8021q_vlan_del(ds, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 4aa29f90ecea..654697ebb6f3 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -17,7 +17,7 @@ * * | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | * +-----------+-----+-----------------+-----------+-----------------------+ - * | DIR | SVL | SWITCH_ID | SUBVLAN | PORT | + * | DIR | VBID| SWITCH_ID | VBID | PORT | * +-----------+-----+-----------------+-----------+-----------------------+ * * DIR - VID[11:10]: @@ -27,24 +27,14 @@ * These values make the special VIDs of 0, 1 and 4095 to be left * unused by this coding scheme. * - * SVL/SUBVLAN - { VID[9], VID[5:4] }: - * Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN. - * * 0 (0b000): Field does not encode a sub-VLAN, either because - * received traffic is untagged, PVID-tagged or because a second - * VLAN tag is present after this tag and not inside of it. - * * 1 (0b001): Received traffic is tagged with a VID value private - * to the host. This field encodes the index in the host's lookup - * table through which the value of the ingress VLAN ID can be - * recovered. - * * 2 (0b010): Field encodes a sub-VLAN. - * ... - * * 7 (0b111): Field encodes a sub-VLAN. - * When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero - * (by the host) and ignored on receive (by the switch). - * * SWITCH_ID - VID[8:6]: * Index of switch within DSA tree. Must be between 0 and 7. * + * VBID - { VID[9], VID[5:4] }: + * Virtual bridge ID. If between 1 and 7, packet targets the broadcast + * domain of a bridge. If transmitted as zero, packet targets a single + * port. Field only valid on transmit, must be ignored on receive. + * * PORT - VID[3:0]: * Index of switch port. Must be between 0 and 15. */ @@ -61,23 +51,30 @@ #define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \ DSA_8021Q_SWITCH_ID_MASK) -#define DSA_8021Q_SUBVLAN_HI_SHIFT 9 -#define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9) -#define DSA_8021Q_SUBVLAN_LO_SHIFT 4 -#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4) -#define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2) -#define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0)) -#define DSA_8021Q_SUBVLAN(x) \ - (((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \ - DSA_8021Q_SUBVLAN_LO_MASK) | \ - ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \ - DSA_8021Q_SUBVLAN_HI_MASK)) +#define DSA_8021Q_VBID_HI_SHIFT 9 +#define DSA_8021Q_VBID_HI_MASK GENMASK(9, 9) +#define DSA_8021Q_VBID_LO_SHIFT 4 +#define DSA_8021Q_VBID_LO_MASK GENMASK(5, 4) +#define DSA_8021Q_VBID_HI(x) (((x) & GENMASK(2, 2)) >> 2) +#define DSA_8021Q_VBID_LO(x) ((x) & GENMASK(1, 0)) +#define DSA_8021Q_VBID(x) \ + (((DSA_8021Q_VBID_LO(x) << DSA_8021Q_VBID_LO_SHIFT) & \ + DSA_8021Q_VBID_LO_MASK) | \ + ((DSA_8021Q_VBID_HI(x) << DSA_8021Q_VBID_HI_SHIFT) & \ + DSA_8021Q_VBID_HI_MASK)) #define DSA_8021Q_PORT_SHIFT 0 #define DSA_8021Q_PORT_MASK GENMASK(3, 0) #define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ DSA_8021Q_PORT_MASK) +u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num) +{ + /* The VBID value of 0 is reserved for precise TX */ + return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num + 1); +} +EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid); + /* Returns the VID to be inserted into the frame from xmit for switch steering * instructions on egress. Encodes switch ID and port ID. */ @@ -98,13 +95,6 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); -u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan) -{ - return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan); -} -EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan); - /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) { @@ -119,20 +109,6 @@ int dsa_8021q_rx_source_port(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); -/* Returns the decoded subvlan from the RX VID. */ -u16 dsa_8021q_rx_subvlan(u16 vid) -{ - u16 svl_hi, svl_lo; - - svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >> - DSA_8021Q_SUBVLAN_HI_SHIFT; - svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >> - DSA_8021Q_SUBVLAN_LO_SHIFT; - - return (svl_hi << 2) | svl_lo; -} -EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan); - bool vid_is_dsa_8021q_rxvlan(u16 vid) { return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX; @@ -151,21 +127,152 @@ bool vid_is_dsa_8021q(u16 vid) } EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); -/* If @enabled is true, installs @vid with @flags into the switch port's HW - * filter. - * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the - * user explicitly configured this @vid through the bridge core, then the @vid - * is installed again, but this time with the flags from the bridge layer. - */ -static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, - u16 flags, bool enabled) +static struct dsa_tag_8021q_vlan * +dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid) { - struct dsa_port *dp = dsa_to_port(ctx->ds, port); + struct dsa_tag_8021q_vlan *v; - if (enabled) - return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags); + list_for_each_entry(v, &ctx->vlans, list) + if (v->vid == vid && v->port == port) + return v; - return ctx->ops->vlan_del(ctx->ds, dp->index, vid); + return NULL; +} + +static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port, + u16 vid, u16 flags) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_tag_8021q_vlan *v; + int err; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->tag_8021q_vlan_add(ds, port, vid, flags); + + v = dsa_tag_8021q_vlan_find(ctx, port, vid); + if (v) { + refcount_inc(&v->refcount); + return 0; + } + + v = kzalloc(sizeof(*v), GFP_KERNEL); + if (!v) + return -ENOMEM; + + err = ds->ops->tag_8021q_vlan_add(ds, port, vid, flags); + if (err) { + kfree(v); + return err; + } + + v->vid = vid; + v->port = port; + refcount_set(&v->refcount, 1); + list_add_tail(&v->list, &ctx->vlans); + + return 0; +} + +static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port, + u16 vid) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_tag_8021q_vlan *v; + int err; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->tag_8021q_vlan_del(ds, port, vid); + + v = dsa_tag_8021q_vlan_find(ctx, port, vid); + if (!v) + return -ENOENT; + + if (!refcount_dec_and_test(&v->refcount)) + return 0; + + err = ds->ops->tag_8021q_vlan_del(ds, port, vid); + if (err) { + refcount_inc(&v->refcount); + return err; + } + + list_del(&v->list); + kfree(v); + + return 0; +} + +static bool +dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + return true; + + if (ds->dst->index == info->tree_index && ds->index == info->sw_index) + return port == info->port; + + return false; +} + +int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + int port, err; + + /* Since we use dsa_broadcast(), there might be other switches in other + * trees which don't support tag_8021q, so don't return an error. + * Or they might even support tag_8021q but have not registered yet to + * use it (maybe they use another tagger currently). + */ + if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx) + return 0; + + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { + u16 flags = 0; + + if (dsa_is_user_port(ds, port)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_is_dsa_8021q_rxvlan(info->vid) && + dsa_8021q_rx_switch_id(info->vid) == ds->index && + dsa_8021q_rx_source_port(info->vid) == port) + flags |= BRIDGE_VLAN_INFO_PVID; + + err = dsa_switch_do_tag_8021q_vlan_add(ds, port, + info->vid, + flags); + if (err) + return err; + } + } + + return 0; +} + +int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + int port, err; + + if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx) + return 0; + + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { + err = dsa_switch_do_tag_8021q_vlan_del(ds, port, + info->vid); + if (err) + return err; + } + } + + return 0; } /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single @@ -181,12 +288,6 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, * force all switched traffic to pass through the CPU. So we must also make * the other front-panel ports members of this VID we're adding, albeit * we're not making it their PVID (they'll still have their own). - * By the way - just because we're installing the same VID in multiple - * switch ports doesn't mean that they'll start to talk to one another, even - * while not bridged: the final forwarding decision is still an AND between - * the L2 forwarding information (which is limiting forwarding in this case) - * and the VLAN-based restrictions (of which there are none in this case, - * since all ports are members). * - On TX (ingress from CPU and towards network) we are faced with a problem. * If we were to tag traffic (from within DSA) with the port's pvid, all * would be well, assuming the switch ports were standalone. Frames would @@ -200,9 +301,10 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, * a member of the VID we're tagging the traffic with - the desired one. * * So at the end, each front-panel port will have one RX VID (also the PVID), - * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU - * port will have the RX and TX VIDs of all front-panel ports, and on top of - * that, is also tagged-input and tagged-output (VLAN trunk). + * the RX VID of all other front-panel ports that are in the same bridge, and + * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all + * front-panel ports, and on top of that, is also tagged-input and + * tagged-output (VLAN trunk). * * CPU port CPU port * +-------------+-----+-------------+ +-------------+-----+-------------+ @@ -220,246 +322,245 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 */ -static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port, - bool enabled) +static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port, + struct dsa_notifier_bridge_info *info) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + + /* Don't match on self */ + if (ds->dst->index == info->tree_index && + ds->index == info->sw_index && + port == info->port) + return false; + + if (dsa_port_is_user(dp)) + return dp->bridge_dev == info->br; + + return false; +} + +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + struct dsa_switch *targeted_ds; + struct dsa_port *targeted_dp; + u16 targeted_rx_vid; + int err, port; + + if (!ds->tag_8021q_ctx) + return 0; + + targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); + targeted_dp = dsa_to_port(targeted_ds, info->port); + targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + + for (port = 0; port < ds->num_ports; port++) { + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + + if (!dsa_tag_8021q_bridge_match(ds, port, info)) + continue; + + /* Install the RX VID of the targeted port in our VLAN table */ + err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid); + if (err) + return err; + + /* Install our RX VID into the targeted port's VLAN table */ + err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid); + if (err) + return err; + } + + return 0; +} + +int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) { - int upstream = dsa_upstream_port(ctx->ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ctx->ds, port); + struct dsa_switch *targeted_ds; + struct dsa_port *targeted_dp; + u16 targeted_rx_vid; + int port; + + if (!ds->tag_8021q_ctx) + return 0; + + targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); + targeted_dp = dsa_to_port(targeted_ds, info->port); + targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + + for (port = 0; port < ds->num_ports; port++) { + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + + if (!dsa_tag_8021q_bridge_match(ds, port, info)) + continue; + + /* Remove the RX VID of the targeted port from our VLAN table */ + dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid); + + /* Remove our RX VID from the targeted port's VLAN table */ + dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid); + } + + return 0; +} + +int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); + + return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload); + +void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); + + dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload); + +/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */ +static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); struct net_device *master; - int i, err, subvlan; + int err; /* The CPU port is implicitly configured by * configuring the front-panel ports */ - if (!dsa_is_user_port(ctx->ds, port)) + if (!dsa_port_is_user(dp)) return 0; - master = dsa_to_port(ctx->ds, port)->cpu_dp->master; + master = dp->cpu_dp->master; /* Add this user port's RX VID to the membership list of all others * (including itself). This is so that bridging will not be hindered. * L2 forwarding rules still take precedence when there are no VLAN * restrictions, so there are no concerns about leaking traffic. */ - for (i = 0; i < ctx->ds->num_ports; i++) { - u16 flags; - - if (i == upstream) - continue; - else if (i == port) - /* The RX VID is pvid on this port */ - flags = BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_PVID; - else - /* The RX VID is a regular VLAN on all others */ - flags = BRIDGE_VLAN_INFO_UNTAGGED; - - err = dsa_8021q_vid_apply(ctx, i, rx_vid, flags, enabled); - if (err) { - dev_err(ctx->ds->dev, - "Failed to apply RX VID %d to port %d: %d\n", - rx_vid, port, err); - return err; - } - } - - /* CPU port needs to see this port's RX VID - * as tagged egress. - */ - err = dsa_8021q_vid_apply(ctx, upstream, rx_vid, 0, enabled); + err = dsa_port_tag_8021q_vlan_add(dp, rx_vid); if (err) { - dev_err(ctx->ds->dev, - "Failed to apply RX VID %d to port %d: %d\n", - rx_vid, port, err); + dev_err(ds->dev, + "Failed to apply RX VID %d to port %d: %pe\n", + rx_vid, port, ERR_PTR(err)); return err; } - /* Add to the master's RX filter not only @rx_vid, but in fact - * the entire subvlan range, just in case this DSA switch might - * want to use sub-VLANs. - */ - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) { - u16 vid = dsa_8021q_rx_vid_subvlan(ctx->ds, port, subvlan); - - if (enabled) - vlan_vid_add(master, ctx->proto, vid); - else - vlan_vid_del(master, ctx->proto, vid); - } + /* Add @rx_vid to the master's RX filter. */ + vlan_vid_add(master, ctx->proto, rx_vid); /* Finally apply the TX VID on this port and on the CPU port */ - err = dsa_8021q_vid_apply(ctx, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED, - enabled); - if (err) { - dev_err(ctx->ds->dev, - "Failed to apply TX VID %d on port %d: %d\n", - tx_vid, port, err); - return err; - } - err = dsa_8021q_vid_apply(ctx, upstream, tx_vid, 0, enabled); + err = dsa_port_tag_8021q_vlan_add(dp, tx_vid); if (err) { - dev_err(ctx->ds->dev, - "Failed to apply TX VID %d on port %d: %d\n", - tx_vid, upstream, err); + dev_err(ds->dev, + "Failed to apply TX VID %d on port %d: %pe\n", + tx_vid, port, ERR_PTR(err)); return err; } return err; } -int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled) +static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) { - int rc, port; + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); + struct net_device *master; - ASSERT_RTNL(); + /* The CPU port is implicitly configured by + * configuring the front-panel ports + */ + if (!dsa_port_is_user(dp)) + return; - for (port = 0; port < ctx->ds->num_ports; port++) { - rc = dsa_8021q_setup_port(ctx, port, enabled); - if (rc < 0) { - dev_err(ctx->ds->dev, - "Failed to setup VLAN tagging for port %d: %d\n", - port, rc); - return rc; - } - } + master = dp->cpu_dp->master; - return 0; -} -EXPORT_SYMBOL_GPL(dsa_8021q_setup); + dsa_port_tag_8021q_vlan_del(dp, rx_vid); -static int dsa_8021q_crosschip_link_apply(struct dsa_8021q_context *ctx, - int port, - struct dsa_8021q_context *other_ctx, - int other_port, bool enabled) -{ - u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port); + vlan_vid_del(master, ctx->proto, rx_vid); - /* @rx_vid of local @ds port @port goes to @other_port of - * @other_ds - */ - return dsa_8021q_vid_apply(other_ctx, other_port, rx_vid, - BRIDGE_VLAN_INFO_UNTAGGED, enabled); + dsa_port_tag_8021q_vlan_del(dp, tx_vid); } -static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port) +static int dsa_tag_8021q_setup(struct dsa_switch *ds) { - struct dsa_8021q_crosschip_link *c; + int err, port; + + ASSERT_RTNL(); - list_for_each_entry(c, &ctx->crosschip_links, list) { - if (c->port == port && c->other_ctx == other_ctx && - c->other_port == other_port) { - refcount_inc(&c->refcount); - return 0; + for (port = 0; port < ds->num_ports; port++) { + err = dsa_tag_8021q_port_setup(ds, port); + if (err < 0) { + dev_err(ds->dev, + "Failed to setup VLAN tagging for port %d: %pe\n", + port, ERR_PTR(err)); + return err; } } - dev_dbg(ctx->ds->dev, - "adding crosschip link from port %d to %s port %d\n", - port, dev_name(other_ctx->ds->dev), other_port); - - c = kzalloc(sizeof(*c), GFP_KERNEL); - if (!c) - return -ENOMEM; - - c->port = port; - c->other_ctx = other_ctx; - c->other_port = other_port; - refcount_set(&c->refcount, 1); - - list_add(&c->list, &ctx->crosschip_links); - return 0; } -static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx, - struct dsa_8021q_crosschip_link *c, - bool *keep) +static void dsa_tag_8021q_teardown(struct dsa_switch *ds) { - *keep = !refcount_dec_and_test(&c->refcount); + int port; - if (*keep) - return; - - dev_dbg(ctx->ds->dev, - "deleting crosschip link from port %d to %s port %d\n", - c->port, dev_name(c->other_ctx->ds->dev), c->other_port); + ASSERT_RTNL(); - list_del(&c->list); - kfree(c); + for (port = 0; port < ds->num_ports; port++) + dsa_tag_8021q_port_teardown(ds, port); } -/* Make traffic from local port @port be received by remote port @other_port. - * This means that our @rx_vid needs to be installed on @other_ds's upstream - * and user ports. The user ports should be egress-untagged so that they can - * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged - * or untagged: it doesn't matter, since it should never egress a frame having - * our @rx_vid. - */ -int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port) +int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto) { - /* @other_upstream is how @other_ds reaches us. If we are part - * of disjoint trees, then we are probably connected through - * our CPU ports. If we're part of the same tree though, we should - * probably use dsa_towards_port. - */ - int other_upstream = dsa_upstream_port(other_ctx->ds, other_port); - int rc; + struct dsa_8021q_context *ctx; - rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_port); - if (rc) - return rc; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; - rc = dsa_8021q_crosschip_link_apply(ctx, port, other_ctx, - other_port, true); - if (rc) - return rc; + ctx->proto = proto; + ctx->ds = ds; - rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_upstream); - if (rc) - return rc; + INIT_LIST_HEAD(&ctx->vlans); - return dsa_8021q_crosschip_link_apply(ctx, port, other_ctx, - other_upstream, true); + ds->tag_8021q_ctx = ctx; + + return dsa_tag_8021q_setup(ds); } -EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_register); -int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port) +void dsa_tag_8021q_unregister(struct dsa_switch *ds) { - int other_upstream = dsa_upstream_port(other_ctx->ds, other_port); - struct dsa_8021q_crosschip_link *c, *n; - - list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) { - if (c->port == port && c->other_ctx == other_ctx && - (c->other_port == other_port || - c->other_port == other_upstream)) { - struct dsa_8021q_context *other_ctx = c->other_ctx; - int other_port = c->other_port; - bool keep; - int rc; - - dsa_8021q_crosschip_link_del(ctx, c, &keep); - if (keep) - continue; - - rc = dsa_8021q_crosschip_link_apply(ctx, port, - other_ctx, - other_port, - false); - if (rc) - return rc; - } + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_tag_8021q_vlan *v, *n; + + dsa_tag_8021q_teardown(ds); + + list_for_each_entry_safe(v, n, &ctx->vlans, list) { + list_del(&v->list); + kfree(v); } - return 0; + ds->tag_8021q_ctx = NULL; + + kfree(ctx); } -EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci) @@ -471,8 +572,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *subvlan) +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id) { u16 vid, tci; @@ -489,9 +589,6 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, *source_port = dsa_8021q_rx_source_port(vid); *switch_id = dsa_8021q_rx_switch_id(vid); - *subvlan = dsa_8021q_rx_subvlan(vid); skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } EXPORT_SYMBOL_GPL(dsa_8021q_rcv); - -MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 0efae1a372b3..8a02ac44282f 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, } static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb, - struct net_device *ndev, - struct packet_type *pt) + struct net_device *ndev) { u8 ver, port; u16 hdr; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 0750af951fc9..96e93b544a0d 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, */ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, unsigned int offset) { int source_port; @@ -167,7 +166,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); return skb; } @@ -182,13 +181,12 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, } -static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev) { struct sk_buff *nskb; /* skb->data points to the EtherType, the tag is right before it */ - nskb = brcm_tag_rcv_ll(skb, dev, pt, 2); + nskb = brcm_tag_rcv_ll(skb, dev, 2); if (!nskb) return nskb; @@ -251,8 +249,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, } static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { int source_port; u8 *brcm_tag; @@ -271,7 +268,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN); - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); /* Move the Ethernet DA and SA */ memmove(skb->data - ETH_HLEN, @@ -302,11 +299,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, } static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { /* tag is prepended to the packet */ - return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN); + return brcm_tag_rcv_ll(skb, dev, ETH_HLEN); } static const struct dsa_device_ops brcm_prepend_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index a822355afc90..e32f8160e895 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -126,7 +126,42 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, u8 extra) { struct dsa_port *dp = dsa_slave_to_port(dev); + u8 tag_dev, tag_port; + enum dsa_cmd cmd; u8 *dsa_header; + u16 pvid = 0; + int err; + + if (skb->offload_fwd_mark) { + struct dsa_switch_tree *dst = dp->ds->dst; + struct net_device *br = dp->bridge_dev; + + cmd = DSA_CMD_FORWARD; + + /* When offloading forwarding for a bridge, inject FORWARD + * packets on behalf of a virtual switch device with an index + * past the physical switches. + */ + tag_dev = dst->last_switch + 1 + dp->bridge_num; + tag_port = 0; + + /* If we are offloading forwarding for a VLAN-unaware bridge, + * inject packets to hardware using the bridge's pvid, since + * that's where the packets ingressed from. + */ + if (!br_vlan_enabled(br)) { + /* Safe because __dev_queue_xmit() runs under + * rcu_read_lock_bh() + */ + err = br_vlan_get_pvid_rcu(br, &pvid); + if (err) + return NULL; + } + } else { + cmd = DSA_CMD_FROM_CPU; + tag_dev = dp->ds->index; + tag_port = dp->index; + } if (skb->protocol == htons(ETH_P_8021Q)) { if (extra) { @@ -134,10 +169,10 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, memmove(skb->data, skb->data + extra, 2 * ETH_ALEN); } - /* Construct tagged FROM_CPU DSA tag from 802.1Q tag. */ + /* Construct tagged DSA tag from 802.1Q tag. */ dsa_header = skb->data + 2 * ETH_ALEN + extra; - dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | 0x20 | dp->ds->index; - dsa_header[1] = dp->index << 3; + dsa_header[0] = (cmd << 6) | 0x20 | tag_dev; + dsa_header[1] = tag_port << 3; /* Move CFI field from byte 2 to byte 1. */ if (dsa_header[2] & 0x10) { @@ -148,12 +183,13 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, skb_push(skb, DSA_HLEN + extra); memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN); - /* Construct untagged FROM_CPU DSA tag. */ + /* Construct untagged DSA tag. */ dsa_header = skb->data + 2 * ETH_ALEN + extra; - dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | dp->ds->index; - dsa_header[1] = dp->index << 3; - dsa_header[2] = 0x00; - dsa_header[3] = 0x00; + + dsa_header[0] = (cmd << 6) | tag_dev; + dsa_header[1] = tag_port << 3; + dsa_header[2] = pvid >> 8; + dsa_header[3] = pvid & 0xff; } return skb; @@ -162,8 +198,8 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, u8 extra) { + bool trap = false, trunk = false; int source_device, source_port; - bool trunk = false; enum dsa_code code; enum dsa_cmd cmd; u8 *dsa_header; @@ -174,8 +210,6 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, cmd = dsa_header[0] >> 6; switch (cmd) { case DSA_CMD_FORWARD: - skb->offload_fwd_mark = 1; - trunk = !!(dsa_header[1] & 7); break; @@ -194,7 +228,6 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, * device (like a bridge) that forwarding has * already been done by hardware. */ - skb->offload_fwd_mark = 1; break; case DSA_CODE_MGMT_TRAP: case DSA_CODE_IGMP_MLD_TRAP: @@ -202,6 +235,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, /* Traps have, by definition, not been * forwarded by hardware, so don't mark them. */ + trap = true; break; default: /* Reserved code, this could be anything. Drop @@ -235,6 +269,15 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, if (!skb->dev) return NULL; + /* When using LAG offload, skb->dev is not a DSA slave interface, + * so we cannot call dsa_default_offload_fwd_mark and we need to + * special-case it. + */ + if (trunk) + skb->offload_fwd_mark = true; + else if (!trap) + dsa_default_offload_fwd_mark(skb); + /* If the 'tagged' bit is set; convert the DSA tag to a 802.1Q * tag, and delete the ethertype (extra) if applicable. If the * 'tagged' bit is cleared; delete the DSA tag, and ethertype @@ -289,8 +332,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) return dsa_xmit_ll(skb, dev, 0); } -static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev) { if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) return NULL; @@ -330,8 +372,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev) { if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) return NULL; diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index 5985dab06ab8..df7140984da3 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, } static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { int port; u8 *gswip_tag; diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 424130f85f59..f64b805303cd 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, } static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN; @@ -44,7 +43,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN); - skb->offload_fwd_mark = true; + dsa_default_offload_fwd_mark(skb); return skb; } diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index a201ccf2435d..fa1d60d13ad9 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -24,7 +24,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, pskb_trim_rcsum(skb, skb->len - len); - skb->offload_fwd_mark = true; + dsa_default_offload_fwd_mark(skb); return skb; } @@ -67,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; @@ -134,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 26207ef39ebc..58d3a0e712d2 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -74,8 +74,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) { __be16 *lan9303_tag; u16 lan9303_tag1; @@ -115,7 +114,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, skb_pull_rcsum(skb, 2 + 2); memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 2 * ETH_ALEN); - skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU); + if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU)) + dsa_default_offload_fwd_mark(skb); return skb; } diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index cc3ba864ad5b..bbf37c031d44 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev) { u16 hdr; int port; @@ -92,7 +91,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb->dev) return NULL; - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); return skb; } diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 190f4bfd3bef..d37ab98e7fe1 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb, } static struct sk_buff *ocelot_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { u64 src_port, qos_class; u64 vlan_tci, tag_type; @@ -104,7 +103,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, */ return NULL; - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); skb->priority = qos_class; /* Ocelot switches copy frames unmodified to the CPU. However, it is diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 85ac85c3af8c..3038a257ba05 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -38,18 +38,17 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, } static struct sk_buff *ocelot_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { - int src_port, switch_id, subvlan; + int src_port, switch_id; - dsa_8021q_rcv(skb, &src_port, &switch_id, &subvlan); + dsa_8021q_rcv(skb, &src_port, &switch_id); skb->dev = dsa_master_find_slave(netdev, switch_id, src_port); if (!skb->dev) return NULL; - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); return skb; } diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 693bda013065..6e3136990491 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) { u8 ver; u16 hdr; diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index 57c46b4ab2b3..aaddca3c0245 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -64,8 +64,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, } static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { u16 protport; __be16 *p; @@ -114,7 +113,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, skb->data - ETH_HLEN - RTL4_A_HDR_LEN, 2 * ETH_ALEN); - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); return skb; } diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 9c2df9ece01b..38b2792f971d 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -115,40 +115,6 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) return true; } -static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb) -{ - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); - u16 vlan_tci; - - if (hdr->h_vlan_proto == htons(ETH_P_SJA1105)) - return true; - - if (hdr->h_vlan_proto != htons(ETH_P_8021Q) && - !skb_vlan_tag_present(skb)) - return false; - - if (skb_vlan_tag_present(skb)) - vlan_tci = skb_vlan_tag_get(skb); - else - vlan_tci = ntohs(hdr->h_vlan_TCI); - - return vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK); -} - -/* This is the first time the tagger sees the frame on RX. - * Figure out if we can decode it. - */ -static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev) -{ - if (sja1105_can_use_vlan_as_tags(skb)) - return true; - if (sja1105_is_link_local(skb)) - return true; - if (sja1105_is_meta_frame(skb)) - return true; - return false; -} - /* Calls sja1105_port_deferred_xmit in sja1105_main.c */ static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, struct sk_buff *skb) @@ -167,6 +133,31 @@ static u16 sja1105_xmit_tpid(struct sja1105_port *sp) return sp->xmit_tpid; } +static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + struct net_device *br = dp->bridge_dev; + u16 tx_vid; + + /* If the port is under a VLAN-aware bridge, just slide the + * VLAN-tagged packet into the FDB and hope for the best. + * This works because we support a single VLAN-aware bridge + * across the entire dst, and its VLANs cannot be shared with + * any standalone port. + */ + if (br_vlan_enabled(br)) + return skb; + + /* If the port is under a VLAN-unaware bridge, use an imprecise + * TX VLAN that targets the bridge's entire broadcast domain, + * instead of just the specific port. + */ + tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num); + + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), tx_vid); +} + static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -175,6 +166,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + if (skb->offload_fwd_mark) + return sja1105_imprecise_xmit(skb, netdev); + /* Transmitting management traffic does not rely upon switch tagging, * but instead SPI-installed management routes. Part 2 of this * is the .port_deferred_xmit driver callback. @@ -199,6 +193,9 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, __be16 *tx_header; int trailer_pos; + if (skb->offload_fwd_mark) + return sja1105_imprecise_xmit(skb, netdev); + /* Transmitting control packets is done using in-band control * extensions, while data packets are transmitted using * tag_8021q TX VLANs. @@ -358,20 +355,6 @@ static struct sk_buff return skb; } -static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan) -{ - struct dsa_port *dp = dsa_slave_to_port(skb->dev); - struct sja1105_port *sp = dp->priv; - u16 vid = sp->subvlan_map[subvlan]; - u16 vlan_tci; - - if (vid == VLAN_N_VID) - return; - - vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid; - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); -} - static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb) { u16 tpid = ntohs(eth_hdr(skb)->h_proto); @@ -385,25 +368,45 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb) return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110; } +/* If the VLAN in the packet is a tag_8021q one, set @source_port and + * @switch_id and strip the header. Otherwise set @vid and keep it in the + * packet. + */ +static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, + int *switch_id, u16 *vid) +{ + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + u16 vlan_tci; + + if (skb_vlan_tag_present(skb)) + vlan_tci = skb_vlan_tag_get(skb); + else + vlan_tci = ntohs(hdr->h_vlan_TCI); + + if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK)) + return dsa_8021q_rcv(skb, source_port, switch_id); + + /* Try our best with imprecise RX */ + *vid = vlan_tci & VLAN_VID_MASK; +} + static struct sk_buff *sja1105_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { - int source_port, switch_id, subvlan = 0; + int source_port = -1, switch_id = -1; struct sja1105_meta meta = {0}; struct ethhdr *hdr; bool is_link_local; bool is_meta; + u16 vid; hdr = eth_hdr(skb); is_link_local = sja1105_is_link_local(skb); is_meta = sja1105_is_meta_frame(skb); - skb->offload_fwd_mark = 1; - if (sja1105_skb_has_tag_8021q(skb)) { /* Normal traffic path. */ - dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan); + sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid); } else if (is_link_local) { /* Management traffic path. Switch embeds the switch ID and * port ID into bytes of the destination MAC, courtesy of @@ -422,14 +425,17 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, return NULL; } - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (source_port == -1 || switch_id == -1) + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + else + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; } - if (subvlan) - sja1105_decode_subvlan(skb, subvlan); + if (!is_link_local) + dsa_default_offload_fwd_mark(skb); return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local, is_meta); @@ -474,7 +480,8 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, int *source_port, - int *switch_id) + int *switch_id, + bool *host_only) { u16 rx_header; @@ -488,6 +495,9 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, */ rx_header = ntohs(*(__be16 *)skb->data); + if (rx_header & SJA1110_RX_HEADER_HOST_ONLY) + *host_only = true; + if (rx_header & SJA1110_RX_HEADER_IS_METADATA) return sja1110_rcv_meta(skb, rx_header); @@ -535,34 +545,35 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, } static struct sk_buff *sja1110_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { - int source_port = -1, switch_id = -1, subvlan = 0; - - skb->offload_fwd_mark = 1; + int source_port = -1, switch_id = -1; + bool host_only = false; + u16 vid = 0; if (sja1110_skb_has_inband_control_extension(skb)) { skb = sja1110_rcv_inband_control_extension(skb, &source_port, - &switch_id); + &switch_id, + &host_only); if (!skb) return NULL; } /* Packets with in-band control extensions might still have RX VLANs */ if (likely(sja1105_skb_has_tag_8021q(skb))) - dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan); + sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid); - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (source_port == -1 || switch_id == -1) + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + else + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); if (!skb->dev) { - netdev_warn(netdev, - "Couldn't decode source port %d and switch id %d\n", - source_port, switch_id); + netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; } - if (subvlan) - sja1105_decode_subvlan(skb, subvlan); + if (!host_only) + dsa_default_offload_fwd_mark(skb); return skb; } @@ -596,7 +607,6 @@ static const struct dsa_device_ops sja1105_netdev_ops = { .proto = DSA_TAG_PROTO_SJA1105, .xmit = sja1105_xmit, .rcv = sja1105_rcv, - .filter = sja1105_filter, .needed_headroom = VLAN_HLEN, .flow_dissect = sja1105_flow_dissect, .promisc_on_master = true, @@ -610,7 +620,6 @@ static const struct dsa_device_ops sja1110_netdev_ops = { .proto = DSA_TAG_PROTO_SJA1110, .xmit = sja1110_xmit, .rcv = sja1110_rcv, - .filter = sja1105_filter, .flow_dissect = sja1110_flow_dissect, .needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN, .needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN, diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index ba73804340a5..5749ba85c2b8 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *trailer; int source_port; diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index a31ff7fcb45f..ff442b8af636 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev) { int source_port; u8 *trailer; @@ -46,7 +45,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; /* Frame is forwarded by hardware, don't forward in software. */ - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); return skb; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9cce612e8976..73fce9467467 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,8 +62,6 @@ #include <linux/uaccess.h> #include <net/pkt_sched.h> -__setup("ether=", netdev_boot_setup); - /** * eth_header - create the Ethernet header * @skb: buffer to alter @@ -182,12 +180,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * at all, so we check here whether one of those tagging * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. - * The DSA tagging protocol may be able to decode some but not all - * traffic (for example only for management). In that case give it the - * option to filter the packets from which it can decode source port - * information. */ - if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev)) + if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); if (likely(eth_proto_is_802_3(eth->h_proto))) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index baa5d10043cb..81fa36a4c9c4 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -7,6 +7,7 @@ * the information ethtool needs. */ +#include <linux/compat.h> #include <linux/module.h> #include <linux/types.h> #include <linux/capability.h> @@ -23,6 +24,7 @@ #include <linux/rtnetlink.h> #include <linux/sched/signal.h> #include <linux/net.h> +#include <linux/pm_runtime.h> #include <net/devlink.h> #include <net/xdp_sock_drv.h> #include <net/flow_offload.h> @@ -807,6 +809,120 @@ out: return ret; } +static noinline_for_stack int +ethtool_rxnfc_copy_from_compat(struct ethtool_rxnfc *rxnfc, + const struct compat_ethtool_rxnfc __user *useraddr, + size_t size) +{ + struct compat_ethtool_rxnfc crxnfc = {}; + + /* We expect there to be holes between fs.m_ext and + * fs.ring_cookie and at the end of fs, but nowhere else. + * On non-x86, no conversion should be needed. + */ + BUILD_BUG_ON(!IS_ENABLED(CONFIG_X86_64) && + sizeof(struct compat_ethtool_rxnfc) != + sizeof(struct ethtool_rxnfc)); + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + + sizeof(useraddr->fs.m_ext) != + offsetof(struct ethtool_rxnfc, fs.m_ext) + + sizeof(rxnfc->fs.m_ext)); + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.location) - + offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != + offsetof(struct ethtool_rxnfc, fs.location) - + offsetof(struct ethtool_rxnfc, fs.ring_cookie)); + + if (copy_from_user(&crxnfc, useraddr, min(size, sizeof(crxnfc)))) + return -EFAULT; + + *rxnfc = (struct ethtool_rxnfc) { + .cmd = crxnfc.cmd, + .flow_type = crxnfc.flow_type, + .data = crxnfc.data, + .fs = { + .flow_type = crxnfc.fs.flow_type, + .h_u = crxnfc.fs.h_u, + .h_ext = crxnfc.fs.h_ext, + .m_u = crxnfc.fs.m_u, + .m_ext = crxnfc.fs.m_ext, + .ring_cookie = crxnfc.fs.ring_cookie, + .location = crxnfc.fs.location, + }, + .rule_cnt = crxnfc.rule_cnt, + }; + + return 0; +} + +static int ethtool_rxnfc_copy_from_user(struct ethtool_rxnfc *rxnfc, + const void __user *useraddr, + size_t size) +{ + if (compat_need_64bit_alignment_fixup()) + return ethtool_rxnfc_copy_from_compat(rxnfc, useraddr, size); + + if (copy_from_user(rxnfc, useraddr, size)) + return -EFAULT; + + return 0; +} + +static int ethtool_rxnfc_copy_to_compat(void __user *useraddr, + const struct ethtool_rxnfc *rxnfc, + size_t size, const u32 *rule_buf) +{ + struct compat_ethtool_rxnfc crxnfc; + + memset(&crxnfc, 0, sizeof(crxnfc)); + crxnfc = (struct compat_ethtool_rxnfc) { + .cmd = rxnfc->cmd, + .flow_type = rxnfc->flow_type, + .data = rxnfc->data, + .fs = { + .flow_type = rxnfc->fs.flow_type, + .h_u = rxnfc->fs.h_u, + .h_ext = rxnfc->fs.h_ext, + .m_u = rxnfc->fs.m_u, + .m_ext = rxnfc->fs.m_ext, + .ring_cookie = rxnfc->fs.ring_cookie, + .location = rxnfc->fs.location, + }, + .rule_cnt = rxnfc->rule_cnt, + }; + + if (copy_to_user(useraddr, &crxnfc, min(size, sizeof(crxnfc)))) + return -EFAULT; + + return 0; +} + +static int ethtool_rxnfc_copy_to_user(void __user *useraddr, + const struct ethtool_rxnfc *rxnfc, + size_t size, const u32 *rule_buf) +{ + int ret; + + if (compat_need_64bit_alignment_fixup()) { + ret = ethtool_rxnfc_copy_to_compat(useraddr, rxnfc, size, + rule_buf); + useraddr += offsetof(struct compat_ethtool_rxnfc, rule_locs); + } else { + ret = copy_to_user(useraddr, rxnfc, size); + useraddr += offsetof(struct ethtool_rxnfc, rule_locs); + } + + if (ret) + return -EFAULT; + + if (rule_buf) { + if (copy_to_user(useraddr, rule_buf, + rxnfc->rule_cnt * sizeof(u32))) + return -EFAULT; + } + + return 0; +} + static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { @@ -825,7 +941,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, info_size = (offsetof(struct ethtool_rxnfc, data) + sizeof(info.data)); - if (copy_from_user(&info, useraddr, info_size)) + if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size)) return -EFAULT; rc = dev->ethtool_ops->set_rxnfc(dev, &info); @@ -833,7 +949,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; if (cmd == ETHTOOL_SRXCLSRLINS && - copy_to_user(useraddr, &info, info_size)) + ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL)) return -EFAULT; return 0; @@ -859,7 +975,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, info_size = (offsetof(struct ethtool_rxnfc, data) + sizeof(info.data)); - if (copy_from_user(&info, useraddr, info_size)) + if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size)) return -EFAULT; /* If FLOW_RSS was requested then user-space must be using the @@ -867,7 +983,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, */ if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) { info_size = sizeof(info); - if (copy_from_user(&info, useraddr, info_size)) + if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size)) return -EFAULT; /* Since malicious users may modify the original data, * we need to check whether FLOW_RSS is still requested. @@ -893,18 +1009,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (ret < 0) goto err_out; - ret = -EFAULT; - if (copy_to_user(useraddr, &info, info_size)) - goto err_out; - - if (rule_buf) { - useraddr += offsetof(struct ethtool_rxnfc, rule_locs); - if (copy_to_user(useraddr, rule_buf, - info.rule_cnt * sizeof(u32))) - goto err_out; - } - ret = 0; - + ret = ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf); err_out: kfree(rule_buf); @@ -2581,15 +2686,14 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) /* The main entry point in this file. Called from net/core/dev_ioctl.c */ -int dev_ethtool(struct net *net, struct ifreq *ifr) +int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) { struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - void __user *useraddr = ifr->ifr_data; u32 ethcmd, sub_cmd; int rc; netdev_features_t old_features; - if (!dev || !netif_device_present(dev)) + if (!dev) return -ENODEV; if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) @@ -2645,10 +2749,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) return -EPERM; } + if (dev->dev.parent) + pm_runtime_get_sync(dev->dev.parent); + + if (!netif_device_present(dev)) { + rc = -ENODEV; + goto out; + } + if (dev->ethtool_ops->begin) { rc = dev->ethtool_ops->begin(dev); - if (rc < 0) - return rc; + if (rc < 0) + goto out; } old_features = dev->features; @@ -2867,6 +2979,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) if (old_features != dev->features) netdev_features_change(dev); +out: + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); return rc; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 73e0f5b626bf..f8bca08e727e 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -2,6 +2,7 @@ #include <net/sock.h> #include <linux/ethtool_netlink.h> +#include <linux/pm_runtime.h> #include "netlink.h" static struct genl_family ethtool_genl_family; @@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; +int ethnl_ops_begin(struct net_device *dev) +{ + int ret; + + if (!dev) + return 0; + + if (dev->dev.parent) + pm_runtime_get_sync(dev->dev.parent); + + if (!netif_device_present(dev)) { + ret = -ENODEV; + goto err; + } + + if (dev->ethtool_ops->begin) { + ret = dev->ethtool_ops->begin(dev); + if (ret) + goto err; + } + + return 0; +err: + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); + + return ret; +} + +void ethnl_ops_complete(struct net_device *dev) +{ + if (dev && dev->ethtool_ops->complete) + dev->ethtool_ops->complete(dev); + + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); +} + /** * ethnl_parse_header_dev_get() - parse request header * @req_info: structure to put results into @@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } - if (dev && !netif_device_present(dev)) { - dev_put(dev); - NL_SET_ERR_MSG(extack, "device not present"); - return -ENODEV; - } - req_info->dev = dev; req_info->flags = flags; return 0; @@ -365,8 +398,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ops->cleanup_data(reply_data); genlmsg_end(rskb, reply_payload); - if (req_info->dev) - dev_put(req_info->dev); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return genlmsg_reply(rskb, info); @@ -378,8 +410,7 @@ err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); err_dev: - if (req_info->dev) - dev_put(req_info->dev); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return ret; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 3fc395c86702..077aac3929a8 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -247,19 +247,8 @@ struct ethnl_reply_data { struct net_device *dev; }; -static inline int ethnl_ops_begin(struct net_device *dev) -{ - if (dev && dev->ethtool_ops->begin) - return dev->ethtool_ops->begin(dev); - else - return 0; -} - -static inline void ethnl_ops_complete(struct net_device *dev) -{ - if (dev && dev->ethtool_ops->complete) - dev->ethtool_ops->complete(dev); -} +int ethnl_ops_begin(struct net_device *dev); +void ethnl_ops_complete(struct net_device *dev); /** * struct ethnl_request_ops - unified handling of GET requests diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 88215b5c93aa..dd5a45f8a78a 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -340,8 +340,7 @@ nla_put_failure: out_dev: wpan_phy_put(phy); out: - if (dev) - dev_put(dev); + dev_put(dev); return rc; } diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 0cf2374c143b..277124f206e0 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) { struct wpan_dev *wpan_dev = info->user_ptr[1]; - if (wpan_dev->netdev) - dev_put(wpan_dev->netdev); + dev_put(wpan_dev->netdev); } else { dev_put(info->user_ptr[1]); } diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a45a0401adc5..90233efa1f6b 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); rcu_read_lock(); dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: @@ -129,7 +128,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, int ret = -ENOIOCTLCMD; struct net_device *dev; - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, NULL, arg)) return -EFAULT; ifr.ifr_name[IFNAMSIZ-1] = 0; @@ -143,7 +142,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); - if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + if (!ret && put_user_ifreq(&ifr, arg)) ret = -EFAULT; dev_put(dev); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 54648181dd56..0e4d758c2585 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -953,10 +953,10 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCGIFNETMASK: case SIOCGIFDSTADDR: case SIOCGIFPFLAGS: - if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); - if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq))) + if (!err && put_user_ifreq(&ifr, p)) err = -EFAULT; break; @@ -966,7 +966,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: case SIOCSIFFLAGS: - if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 73721a4448bd..f4468980b675 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev) static struct in_ifaddr *inet_alloc_ifa(void) { - return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); + return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT); } static void inet_rcu_free_ifa(struct rcu_head *head) @@ -1243,7 +1243,7 @@ out: return ret; } -static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) +int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { struct in_device *in_dev = __in_dev_get_rtnl(dev); const struct in_ifaddr *ifa; @@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { }; static int inet_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; @@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev, return -EAFNOSUPPORT; err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, - inet_af_policy, NULL); + inet_af_policy, extack); if (err < 0) return err; @@ -2424,11 +2425,15 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + struct net *net = ctl->extra2; + int ret; - if (write && *valp != val) { - struct net *net = ctl->extra2; + if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + if (write && *valp != val) { if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { if (!rtnl_trylock()) { /* Restore the original values before restarting */ @@ -2762,8 +2767,6 @@ void __init devinet_init(void) INIT_HLIST_HEAD(&inet_addr_lst[i]); register_pernet_subsys(&devinet_ops); - - register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4c0c33e4710d..b42c429cebbe 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) void fib_nh_common_release(struct fib_nh_common *nhc) { - if (nhc->nhc_dev) - dev_put(nhc->nhc_dev); - + dev_put(nhc->nhc_dev); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); @@ -260,7 +258,7 @@ EXPORT_SYMBOL_GPL(free_fib_info); void fib_release_info(struct fib_info *fi) { spin_lock_bh(&fib_info_lock); - if (fi && --fi->fib_treeref == 0) { + if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); @@ -1373,7 +1371,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, if (!cfg->fc_mx) { fi = fib_find_info_nh(net, cfg); if (fi) { - fi->fib_treeref++; + refcount_inc(&fi->fib_treeref); return fi; } } @@ -1547,11 +1545,11 @@ link_it: if (ofi) { fi->fib_dead = 1; free_fib_info(fi); - ofi->fib_treeref++; + refcount_inc(&ofi->fib_treeref); return ofi; } - fi->fib_treeref++; + refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 25cf387cca5b..8060524f4256 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2380,11 +2380,11 @@ void __init fib_trie_init(void) { fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), - 0, SLAB_PANIC, NULL); + 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", LEAF_SIZE, - 0, SLAB_PANIC, NULL); + 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); } struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c695d294a5df..8b30cadff708 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1095,8 +1095,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) sizeof(struct in6_addr)) goto send_mal_query; dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); - if (dev) - dev_hold(dev); + dev_hold(dev); break; #endif default: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6b3c558a4f23..7e5072722f05 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2233,7 +2233,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, iml->sfmode, psf->sl_count, psf->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc); kfree_rcu(psf, rcu); return err; } @@ -2382,7 +2382,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -2393,7 +2394,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); @@ -2468,19 +2470,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } if (msf->imsf_numsrc) { - newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + msf->imsf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; } newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc; - memcpy(newpsl->sl_addr, msf->imsf_slist, - msf->imsf_numsrc * sizeof(msf->imsf_slist[0])); + memcpy(newpsl->sl_addr, msf->imsf_slist_flex, + flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc)); err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { - sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, + struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } } else { @@ -2493,7 +2498,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, @@ -2551,14 +2557,14 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, count = psl->sl_count; } copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc; - len = copycount * sizeof(psl->sl_addr[0]); + len = flex_array_size(psl, sl_addr, copycount); msf->imsf_numsrc = count; if (put_user(IP_MSFILTER_SIZE(copycount), optlen) || copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) { return -EFAULT; } if (len && - copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len)) + copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len)) return -EFAULT; return 0; done: @@ -2713,6 +2719,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u rv = 1; } else if (im) { if (src_addr) { + spin_lock_bh(&im->lock); for (psf = im->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == src_addr) break; @@ -2723,6 +2730,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u im->sfcount[MCAST_EXCLUDE]; else rv = im->sfcount[MCAST_EXCLUDE] != 0; + spin_unlock_bh(&im->lock); } else rv = 1; /* unspecified source; tentatively allow */ } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 12dca0c85f3c..6ebf05859acb 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -923,7 +923,7 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_stop = ipgre_close, #endif .ndo_start_xmit = ipgre_xmit, - .ndo_do_ioctl = ip_tunnel_ioctl, + .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8d8a8da3ae7e..6b04a88466b2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); - /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (!skb2) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (lwtunnel_xmit_redirect(dst->lwtstate)) { @@ -446,8 +437,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) { BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); - memcpy(&iph->saddr, &fl4->saddr, - sizeof(fl4->saddr) + sizeof(fl4->daddr)); + + iph->saddr = fl4->saddr; + iph->daddr = fl4->daddr; } /* Note: skb->sk can be different from sk, in case of tunnels */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ec6036713e2c..b297bb28556e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -663,12 +663,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex, struct sockaddr_storage *group, struct sockaddr_storage *list) { - int msize = IP_MSFILTER_SIZE(numsrc); struct ip_msfilter *msf; struct sockaddr_in *psin; int err, i; - msf = kmalloc(msize, GFP_KERNEL); + msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL); if (!msf) return -ENOBUFS; @@ -684,7 +683,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex, if (psin->sin_family != AF_INET) goto Eaddrnotavail; - msf->imsf_slist[i] = psin->sin_addr.s_addr; + msf->imsf_slist_flex[i] = psin->sin_addr.s_addr; } err = ip_mc_msfilter(sk, msf, ifindex); kfree(msf); @@ -791,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) goto out_free_gsf; err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, - gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist); + gsf->gf_fmode, &gsf->gf_group, + gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return err; @@ -800,7 +800,7 @@ out_free_gsf: static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; unsigned int n; void *p; @@ -814,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, p = kmalloc(optlen + 4, GFP_KERNEL); if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ err = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) @@ -827,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_gsf; err = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_gsf; /* numsrc >= (4G-140)/128 overflow in 32 bits */ @@ -835,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, - &gf32->gf_group, gf32->gf_slist); + &gf32->gf_group, gf32->gf_slist_flex); out_free_gsf: kfree(p); return err; @@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname) static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, return -EFAULT; num = gsf.gf_numsrc; - err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex); if (err) return err; if (gsf.gf_numsrc < num) @@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, num = gf.gf_numsrc = gf32.gf_numsrc; gf.gf_group = gf32.gf_group; - err = ip_mc_gsfget(sk, &gf, p->gf_slist); + err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex); if (err) return err; if (gf.gf_numsrc < num) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index be75b409445c..fe9101d3d69e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -958,19 +958,20 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ctl); -int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { struct ip_tunnel_parm p; int err; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) return -EFAULT; err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); - if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (!err && copy_to_user(data, &p, sizeof(p))) return -EFAULT; return err; } -EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); +EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index eb560eecee08..efe25a0172e6 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -405,7 +405,7 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_init = vti_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = vti_tunnel_xmit, - .ndo_do_ioctl = ip_tunnel_ioctl, + .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 266c65577ba6..3aa78ccbec3e 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -347,7 +347,7 @@ static const struct net_device_ops ipip_netdev_ops = { .ndo_init = ipip_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, - .ndo_do_ioctl = ip_tunnel_ioctl, + .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 99c06944501a..b181773d7ad3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) struct rt_cache_stat *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); + seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); return 0; } - seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " - " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x\n", dst_entries_get_slow(&ipv4_dst_ops), 0, /* st->in_hit */ st->in_slow_tot, @@ -1299,26 +1300,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) { - const struct rtable *rt = (const struct rtable *)dst; - unsigned int mtu = rt->rt_pmtu; - - if (!mtu || time_after_eq(jiffies, rt->dst.expires)) - mtu = dst_metric_raw(dst, RTAX_MTU); - - if (mtu) - goto out; - - mtu = READ_ONCE(dst->dev->mtu); - - if (unlikely(ip_mtu_locked(dst))) { - if (rt->rt_uses_gateway && mtu > 576) - mtu = 576; - } - -out: - mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + return ip_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ipv4_mtu); @@ -2831,8 +2813,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->output = dst_discard_out; new->dev = net->loopback_dev; - if (new->dev) - dev_hold(new->dev); + dev_hold(new->dev); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8cb44040ec68..f931def6302e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4512,7 +4512,9 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN | SLAB_PANIC | + SLAB_ACCOUNT, + NULL); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 25fa4c01a17f..62ba8d0f2c60 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -55,12 +55,7 @@ void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); - - ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL); - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL); if (ctxt) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); @@ -89,18 +84,12 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, ctx->num = 1; } - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; - octx = rcu_dereference_protected(q->ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(q->ctx, ctx); + octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx); } else { - octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); + octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx); } - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 149ceb5c94ff..3f7bd7ae7d7a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -100,6 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ +#define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -454,11 +455,12 @@ static void tcp_sndbuf_expand(struct sock *sk) */ /* Slow part of check#2. */ -static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) +static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, + unsigned int skbtruesize) { struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ - int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; + int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { @@ -471,7 +473,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) return 0; } -static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) +/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing + * can play nice with us, as sk_buff and skb->head might be either + * freed or shared with up to MAX_SKB_FRAGS segments. + * Only give a boost to drivers using page frag(s) to hold the frame(s), + * and if no payload was pulled in skb->head before reaching us. + */ +static u32 truesize_adjust(bool adjust, const struct sk_buff *skb) +{ + u32 truesize = skb->truesize; + + if (adjust && !skb_headlen(skb)) { + truesize -= SKB_TRUESIZE(skb_end_offset(skb)); + /* paranoid check, some drivers might be buggy */ + if (unlikely((int)truesize < (int)skb->len)) + truesize = skb->truesize; + } + return truesize; +} + +static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, + bool adjust) { struct tcp_sock *tp = tcp_sk(sk); int room; @@ -480,15 +502,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #1 */ if (room > 0 && !tcp_under_memory_pressure(sk)) { + unsigned int truesize = truesize_adjust(adjust, skb); int incr; /* Check #2. Increase window, if skb with such overhead * will fit to rcvbuf in future. */ - if (tcp_win_from_space(sk, skb->truesize) <= skb->len) + if (tcp_win_from_space(sk, truesize) <= skb->len) incr = 2 * tp->advmss; else - incr = __tcp_grow_window(sk, skb); + incr = __tcp_grow_window(sk, skb, truesize); if (incr) { incr = max_t(int, incr, 2 * skb->len); @@ -782,7 +805,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_ecn_check_ce(sk, skb); if (skb->len >= 128) - tcp_grow_window(sk, skb); + tcp_grow_window(sk, skb, true); } /* Called to compute a smoothed rtt estimate. The data fed to this @@ -969,6 +992,8 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, return 0; if (seq_len > tp->mss_cache) dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); + else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq) + state->flag |= FLAG_DSACK_TLP; tp->dsack_dups += dup_segs; /* Skip the DSACK if dup segs weren't retransmitted by sender */ @@ -976,7 +1001,14 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, return 0; tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; - tp->rack.dsack_seen = 1; + /* We increase the RACK ordering window in rounds where we receive + * DSACKs that may have been due to reordering causing RACK to trigger + * a spurious fast recovery. Thus RACK ignores DSACKs that happen + * without having seen reordering, or that match TLP probes (TLP + * is timer-driven, not triggered by RACK). + */ + if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP)) + tp->rack.dsack_seen = 1; state->flag |= FLAG_DSACKING_ACK; /* A spurious retransmission is delivered */ @@ -3628,7 +3660,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) if (!tp->tlp_retrans) { /* TLP of new data has been acknowledged */ tp->tlp_high_seq = 0; - } else if (flag & FLAG_DSACKING_ACK) { + } else if (flag & FLAG_DSACK_TLP) { /* This DSACK means original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; } else if (after(ack, tp->tlp_high_seq)) { @@ -4769,7 +4801,7 @@ coalesce_done: * and trigger fast retransmit. */ if (tcp_is_sack(tp)) - tcp_grow_window(sk, skb); + tcp_grow_window(sk, skb, true); kfree_skb_partial(skb, fragstolen); skb = NULL; goto add_sack; @@ -4857,7 +4889,7 @@ end: * and trigger fast retransmit. */ if (tcp_is_sack(tp)) - tcp_grow_window(sk, skb); + tcp_grow_window(sk, skb, false); skb_condense(skb); skb_set_owner_r(skb, sk); } @@ -5383,7 +5415,7 @@ static void tcp_new_space(struct sock *sk) tp->snd_cwnd_stamp = tcp_jiffies32; } - sk->sk_write_space(sk); + INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); } static void tcp_check_space(struct sock *sk) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a692626c19e4..2e62e0d6373a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2277,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -/* - * Get next listener socket follow cur. If cur is NULL, get first socket - * starting from bucket given in st->bucket; when st->bucket is zero the - * very first socket in the hash table is returned. +static unsigned short seq_file_family(const struct seq_file *seq); + +static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) +{ + unsigned short family = seq_file_family(seq); + + /* AF_UNSPEC is used as a match all */ + return ((family == AF_UNSPEC || family == sk->sk_family) && + net_eq(sock_net(sk), seq_file_net(seq))); +} + +/* Find a non empty bucket (starting from st->bucket) + * and return the first sk from it. */ -static void *listening_get_next(struct seq_file *seq, void *cur) +static void *listening_get_first(struct seq_file *seq) { - struct tcp_seq_afinfo *afinfo; struct tcp_iter_state *st = seq->private; - struct net *net = seq_file_net(seq); - struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; - struct sock *sk = cur; - if (st->bpf_seq_afinfo) - afinfo = st->bpf_seq_afinfo; - else - afinfo = PDE_DATA(file_inode(seq->file)); + st->offset = 0; + for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) { + struct inet_listen_hashbucket *ilb2; + struct inet_connection_sock *icsk; + struct sock *sk; - if (!sk) { -get_head: - ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock(&ilb->lock); - sk = sk_nulls_head(&ilb->nulls_head); - st->offset = 0; - goto get_sk; + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + if (hlist_empty(&ilb2->head)) + continue; + + spin_lock(&ilb2->lock); + inet_lhash2_for_each_icsk(icsk, &ilb2->head) { + sk = (struct sock *)icsk; + if (seq_sk_match(seq, sk)) + return sk; + } + spin_unlock(&ilb2->lock); } - ilb = &tcp_hashinfo.listening_hash[st->bucket]; + + return NULL; +} + +/* Find the next sk of "cur" within the same bucket (i.e. st->bucket). + * If "cur" is the last one in the st->bucket, + * call listening_get_first() to return the first sk of the next + * non empty bucket. + */ +static void *listening_get_next(struct seq_file *seq, void *cur) +{ + struct tcp_iter_state *st = seq->private; + struct inet_listen_hashbucket *ilb2; + struct inet_connection_sock *icsk; + struct sock *sk = cur; + ++st->num; ++st->offset; - sk = sk_nulls_next(sk); -get_sk: - sk_nulls_for_each_from(sk, node) { - if (!net_eq(sock_net(sk), net)) - continue; - if (afinfo->family == AF_UNSPEC || - sk->sk_family == afinfo->family) + icsk = inet_csk(sk); + inet_lhash2_for_each_icsk_continue(icsk) { + sk = (struct sock *)icsk; + if (seq_sk_match(seq, sk)) return sk; } - spin_unlock(&ilb->lock); - st->offset = 0; - if (++st->bucket < INET_LHTABLE_SIZE) - goto get_head; - return NULL; + + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + spin_unlock(&ilb2->lock); + ++st->bucket; + return listening_get_first(seq); } static void *listening_get_idx(struct seq_file *seq, loff_t *pos) @@ -2331,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) st->bucket = 0; st->offset = 0; - rc = listening_get_next(seq, NULL); + rc = listening_get_first(seq); while (rc && *pos) { rc = listening_get_next(seq, rc); @@ -2351,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st) */ static void *established_get_first(struct seq_file *seq) { - struct tcp_seq_afinfo *afinfo; struct tcp_iter_state *st = seq->private; - struct net *net = seq_file_net(seq); - void *rc = NULL; - - if (st->bpf_seq_afinfo) - afinfo = st->bpf_seq_afinfo; - else - afinfo = PDE_DATA(file_inode(seq->file)); st->offset = 0; for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { @@ -2373,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq) spin_lock_bh(lock); sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if ((afinfo->family != AF_UNSPEC && - sk->sk_family != afinfo->family) || - !net_eq(sock_net(sk), net)) { - continue; - } - rc = sk; - goto out; + if (seq_sk_match(seq, sk)) + return sk; } spin_unlock_bh(lock); } -out: - return rc; + + return NULL; } static void *established_get_next(struct seq_file *seq, void *cur) { - struct tcp_seq_afinfo *afinfo; struct sock *sk = cur; struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; - struct net *net = seq_file_net(seq); - - if (st->bpf_seq_afinfo) - afinfo = st->bpf_seq_afinfo; - else - afinfo = PDE_DATA(file_inode(seq->file)); ++st->num; ++st->offset; @@ -2406,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) sk = sk_nulls_next(sk); sk_nulls_for_each_from(sk, node) { - if ((afinfo->family == AF_UNSPEC || - sk->sk_family == afinfo->family) && - net_eq(sock_net(sk), net)) + if (seq_sk_match(seq, sk)) return sk; } @@ -2451,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_seek_last_pos(struct seq_file *seq) { struct tcp_iter_state *st = seq->private; + int bucket = st->bucket; int offset = st->offset; int orig_num = st->num; void *rc = NULL; switch (st->state) { case TCP_SEQ_STATE_LISTENING: - if (st->bucket >= INET_LHTABLE_SIZE) + if (st->bucket > tcp_hashinfo.lhash2_mask) break; st->state = TCP_SEQ_STATE_LISTENING; - rc = listening_get_next(seq, NULL); - while (offset-- && rc) + rc = listening_get_first(seq); + while (offset-- && rc && bucket == st->bucket) rc = listening_get_next(seq, rc); if (rc) break; @@ -2472,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) if (st->bucket > tcp_hashinfo.ehash_mask) break; rc = established_get_first(seq); - while (offset-- && rc) + while (offset-- && rc && bucket == st->bucket) rc = established_get_next(seq, rc); } @@ -2542,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); + spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); break; case TCP_SEQ_STATE_ESTABLISHED: if (v) @@ -2687,6 +2687,15 @@ out: } #ifdef CONFIG_BPF_SYSCALL +struct bpf_tcp_iter_state { + struct tcp_iter_state state; + unsigned int cur_sk; + unsigned int end_sk; + unsigned int max_sk; + struct sock **batch; + bool st_bucket_done; +}; + struct bpf_iter__tcp { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct sock_common *, sk_common); @@ -2705,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, return bpf_iter_run_prog(prog, &ctx); } +static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) +{ + while (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); +} + +static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, + unsigned int new_batch_sz) +{ + struct sock **new_batch; + + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, + GFP_USER | __GFP_NOWARN); + if (!new_batch) + return -ENOMEM; + + bpf_iter_tcp_put_batch(iter); + kvfree(iter->batch); + iter->batch = new_batch; + iter->max_sk = new_batch_sz; + + return 0; +} + +static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, + struct sock *start_sk) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + struct inet_connection_sock *icsk; + unsigned int expected = 1; + struct sock *sk; + + sock_hold(start_sk); + iter->batch[iter->end_sk++] = start_sk; + + icsk = inet_csk(start_sk); + inet_lhash2_for_each_icsk_continue(icsk) { + sk = (struct sock *)icsk; + if (seq_sk_match(seq, sk)) { + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + expected++; + } + } + spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); + + return expected; +} + +static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq, + struct sock *start_sk) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + struct hlist_nulls_node *node; + unsigned int expected = 1; + struct sock *sk; + + sock_hold(start_sk); + iter->batch[iter->end_sk++] = start_sk; + + sk = sk_nulls_next(start_sk); + sk_nulls_for_each_from(sk, node) { + if (seq_sk_match(seq, sk)) { + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + expected++; + } + } + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + + return expected; +} + +static struct sock *bpf_iter_tcp_batch(struct seq_file *seq) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + unsigned int expected; + bool resized = false; + struct sock *sk; + + /* The st->bucket is done. Directly advance to the next + * bucket instead of having the tcp_seek_last_pos() to skip + * one by one in the current bucket and eventually find out + * it has to advance to the next bucket. + */ + if (iter->st_bucket_done) { + st->offset = 0; + st->bucket++; + if (st->state == TCP_SEQ_STATE_LISTENING && + st->bucket > tcp_hashinfo.lhash2_mask) { + st->state = TCP_SEQ_STATE_ESTABLISHED; + st->bucket = 0; + } + } + +again: + /* Get a new batch */ + iter->cur_sk = 0; + iter->end_sk = 0; + iter->st_bucket_done = false; + + sk = tcp_seek_last_pos(seq); + if (!sk) + return NULL; /* Done */ + + if (st->state == TCP_SEQ_STATE_LISTENING) + expected = bpf_iter_tcp_listening_batch(seq, sk); + else + expected = bpf_iter_tcp_established_batch(seq, sk); + + if (iter->end_sk == expected) { + iter->st_bucket_done = true; + return sk; + } + + if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) { + resized = true; + goto again; + } + + return sk; +} + +static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos) +{ + /* bpf iter does not support lseek, so it always + * continue from where it was stop()-ped. + */ + if (*pos) + return bpf_iter_tcp_batch(seq); + + return SEQ_START_TOKEN; +} + +static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + struct sock *sk; + + /* Whenever seq_next() is called, the iter->cur_sk is + * done with seq_show(), so advance to the next sk in + * the batch. + */ + if (iter->cur_sk < iter->end_sk) { + /* Keeping st->num consistent in tcp_iter_state. + * bpf_iter_tcp does not use st->num. + * meta.seq_num is used instead. + */ + st->num++; + /* Move st->offset to the next sk in the bucket such that + * the future start() will resume at st->offset in + * st->bucket. See tcp_seek_last_pos(). + */ + st->offset++; + sock_put(iter->batch[iter->cur_sk++]); + } + + if (iter->cur_sk < iter->end_sk) + sk = iter->batch[iter->cur_sk]; + else + sk = bpf_iter_tcp_batch(seq); + + ++*pos; + /* Keeping st->last_pos consistent in tcp_iter_state. + * bpf iter does not do lseek, so st->last_pos always equals to *pos. + */ + st->last_pos = *pos; + return sk; +} + static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; + bool slow; uid_t uid; + int ret; if (v == SEQ_START_TOKEN) return 0; + if (sk_fullsock(sk)) + slow = lock_sock_fast(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; + goto unlock; + } + if (sk->sk_state == TCP_TIME_WAIT) { uid = 0; } else if (sk->sk_state == TCP_NEW_SYN_RECV) { @@ -2728,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) meta.seq = seq; prog = bpf_iter_get_info(&meta, false); - return tcp_prog_seq_show(prog, &meta, v, uid); + ret = tcp_prog_seq_show(prog, &meta, v, uid); + +unlock: + if (sk_fullsock(sk)) + unlock_sock_fast(sk, slow); + return ret; + } static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) { + struct bpf_tcp_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; @@ -2743,16 +2947,33 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) (void)tcp_prog_seq_show(prog, &meta, v, 0); } - tcp_seq_stop(seq, v); + if (iter->cur_sk < iter->end_sk) { + bpf_iter_tcp_put_batch(iter); + iter->st_bucket_done = false; + } } static const struct seq_operations bpf_iter_tcp_seq_ops = { .show = bpf_iter_tcp_seq_show, - .start = tcp_seq_start, - .next = tcp_seq_next, + .start = bpf_iter_tcp_seq_start, + .next = bpf_iter_tcp_seq_next, .stop = bpf_iter_tcp_seq_stop, }; #endif +static unsigned short seq_file_family(const struct seq_file *seq) +{ + const struct tcp_seq_afinfo *afinfo; + +#ifdef CONFIG_BPF_SYSCALL + /* Iterated from bpf_iter. Let the bpf prog to filter instead. */ + if (seq->op == &bpf_iter_tcp_seq_ops) + return AF_UNSPEC; +#endif + + /* Iterated from proc fs */ + afinfo = PDE_DATA(file_inode(seq->file)); + return afinfo->family; +} static const struct seq_operations tcp4_seq_ops = { .show = tcp4_seq_show, @@ -2964,7 +3185,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; - spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); @@ -3003,39 +3223,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, struct sock_common *sk_common, uid_t uid) +#define INIT_BATCH_SZ 16 + static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) { - struct tcp_iter_state *st = priv_data; - struct tcp_seq_afinfo *afinfo; - int ret; + struct bpf_tcp_iter_state *iter = priv_data; + int err; - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); - if (!afinfo) - return -ENOMEM; + err = bpf_iter_init_seq_net(priv_data, aux); + if (err) + return err; - afinfo->family = AF_UNSPEC; - st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data, aux); - if (ret) - kfree(afinfo); - return ret; + err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ); + if (err) { + bpf_iter_fini_seq_net(priv_data); + return err; + } + + return 0; } static void bpf_iter_fini_tcp(void *priv_data) { - struct tcp_iter_state *st = priv_data; + struct bpf_tcp_iter_state *iter = priv_data; - kfree(st->bpf_seq_afinfo); bpf_iter_fini_seq_net(priv_data); + kvfree(iter->batch); } static const struct bpf_iter_seq_info tcp_seq_info = { .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, .fini_seq_private = bpf_iter_fini_tcp, - .seq_priv_size = sizeof(struct tcp_iter_state), + .seq_priv_size = sizeof(struct bpf_tcp_iter_state), }; +static const struct bpf_func_proto * +bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_setsockopt: + return &bpf_sk_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_sk_getsockopt_proto; + default: + return NULL; + } +} + static struct bpf_iter_reg tcp_reg_info = { .target = "tcp", .ctx_arg_info_size = 1, @@ -3043,6 +3279,7 @@ static struct bpf_iter_reg tcp_reg_info = { { offsetof(struct bpf_iter__tcp, sk_common), PTR_TO_BTF_ID_OR_NULL }, }, + .get_func_proto = bpf_iter_tcp_get_func_proto, .seq_info = &tcp_seq_info, }; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 6f1b4ac7fe99..fd113f6226ef 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -172,7 +172,8 @@ void tcp_rack_reo_timeout(struct sock *sk) /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. * - * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded + * If a DSACK is received that seems like it may have been due to reordering + * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded * by srtt), since there is possibility that spurious retransmission was * due to reordering delay longer than reo_wnd. * diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 9f5a5cdc38e6..7a1d5f473878 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -112,7 +112,6 @@ static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS]; static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) { *prot = *base; - prot->unhash = sock_map_unhash; prot->close = sock_map_close; prot->recvmsg = udp_bpf_recvmsg; } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 747f56e0c636..e504204bca92 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL If unsure, say N. +config IPV6_IOAM6_LWTUNNEL + bool "IPv6: IOAM Pre-allocated Trace insertion support" + depends on IPV6 + select LWTUNNEL + help + Support for the inline insertion of IOAM Pre-allocated + Trace Header (only on locally generated packets), using + the lightweight tunnels mechanism. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index cf7b47bdb9b3..1bc7e143217b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o fib6_notifier.o rpl.o + udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o +ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3bf685fe64b9..8381288a0d6e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -89,6 +89,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> +#include <linux/ioam6.h> #define INFINITY_LIFE_TIME 0xFFFFFFFF @@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -694,8 +701,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, errout: if (in6_dev) in6_dev_put(in6_dev); - if (dev) - dev_put(dev); + dev_put(dev); return err; } @@ -1080,7 +1086,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - ifa = kzalloc(sizeof(*ifa), gfp_flags); + ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT); if (!ifa) { err = -ENOBUFS; goto out; @@ -5211,8 +5217,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .netnsid = -1, .type = type, }; - struct net *net = sock_net(skb->sk); - struct net *tgt_net = net; + struct net *tgt_net = sock_net(skb->sk); int idx, s_idx, s_ip_idx; int h, s_h; struct net_device *dev; @@ -5351,7 +5356,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct net *net = sock_net(in_skb->sk); + struct net *tgt_net = sock_net(in_skb->sk); struct inet6_fill_args fillargs = { .portid = NETLINK_CB(in_skb).portid, .seq = nlh->nlmsg_seq, @@ -5359,7 +5364,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, .flags = 0, .netnsid = -1, }; - struct net *tgt_net = net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL, *peer; @@ -5412,8 +5416,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, errout_ifa: in6_ifa_put(ifa); errout: - if (dev) - dev_put(dev); + dev_put(dev); if (fillargs.netnsid >= 0) put_net(tgt_net); @@ -5526,6 +5529,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; + array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; + array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; + array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; } static inline size_t inet6_ifla6_size(void) @@ -5784,7 +5790,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net, } static int inet6_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_INET6_MAX + 1]; struct inet6_dev *idev = NULL; @@ -5797,7 +5804,7 @@ static int inet6_validate_link_af(const struct net_device *dev, } err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, - inet6_af_policy, NULL); + inet6_af_policy, extack); if (err) return err; @@ -6540,6 +6547,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, static int minus_one = -1; static const int two_five_five = 255; +static u32 ioam6_if_id_max = U16_MAX; static const struct ctl_table addrconf_sysctl[] = { { @@ -6933,6 +6941,31 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, { + .procname = "ioam6_enabled", + .data = &ipv6_devconf.ioam6_enabled, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { + .procname = "ioam6_id", + .data = &ipv6_devconf.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)&ioam6_if_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &ipv6_devconf.ioam6_id_wide, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2389ff702f51..d92c90d97763 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,7 @@ #include <net/rpl.h> #include <net/compat.h> #include <net/xfrm.h> +#include <net/ioam6.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); + net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; + net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; + err = ipv6_init_mibs(net); if (err) return err; @@ -1191,6 +1195,10 @@ static int __init inet6_init(void) if (err) goto rpl_fail; + err = ioam6_init(); + if (err) + goto ioam6_fail; + err = igmp6_late_init(); if (err) goto igmp6_late_err; @@ -1213,6 +1221,8 @@ sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: + ioam6_exit(); +ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 26882e165c9e..3a871a09f962 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,22 +49,12 @@ #include <net/seg6_hmac.h> #endif #include <net/rpl.h> +#include <linux/ioam6.h> +#include <net/ioam6.h> +#include <net/dst_metadata.h> #include <linux/uaccess.h> -/* - * Parsing tlv encoded headers. - * - * Parsing function "func" returns true, if parsing succeed - * and false, if it failed. - * It MUST NOT touch skb->h. - */ - -struct tlvtype_proc { - int type; - bool (*func)(struct sk_buff *skb, int offset); -}; - /********************* Generic functions *********************/ @@ -109,16 +99,23 @@ drop: return false; } +static bool ipv6_hop_ra(struct sk_buff *skb, int optoff); +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff); +static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff); +static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff); +#if IS_ENABLED(CONFIG_IPV6_MIP6) +static bool ipv6_dest_hao(struct sk_buff *skb, int optoff); +#endif + /* Parse tlv encoded option header (hop-by-hop or destination) */ -static bool ip6_parse_tlv(const struct tlvtype_proc *procs, +static bool ip6_parse_tlv(bool hopbyhop, struct sk_buff *skb, int max_count) { int len = (skb_transport_header(skb)[1] + 1) << 3; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); - const struct tlvtype_proc *curr; bool disallow_unknowns = false; int tlv_count = 0; int padlen = 0; @@ -173,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, if (tlv_count > max_count) goto bad; - for (curr = procs; curr->type >= 0; curr++) { - if (curr->type == nh[off]) { - /* type specific length/alignment - checks will be performed in the - func(). */ - if (curr->func(skb, off) == false) + if (hopbyhop) { + switch (nh[off]) { + case IPV6_TLV_ROUTERALERT: + if (!ipv6_hop_ra(skb, off)) + return false; + break; + case IPV6_TLV_IOAM: + if (!ipv6_hop_ioam(skb, off)) + return false; + break; + case IPV6_TLV_JUMBO: + if (!ipv6_hop_jumbo(skb, off)) + return false; + break; + case IPV6_TLV_CALIPSO: + if (!ipv6_hop_calipso(skb, off)) + return false; + break; + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) + return false; + break; + } + } else { + switch (nh[off]) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPV6_TLV_HAO: + if (!ipv6_dest_hao(skb, off)) + return false; + break; +#endif + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) return false; break; } } - if (curr->type < 0 && - !ip6_tlvopt_unknown(skb, off, disallow_unknowns)) - return false; - padlen = 0; } off += optlen; @@ -264,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) } #endif -static const struct tlvtype_proc tlvprocdestopt_lst[] = { -#if IS_ENABLED(CONFIG_IPV6_MIP6) - { - .type = IPV6_TLV_HAO, - .func = ipv6_dest_hao, - }, -#endif - {-1, NULL} -}; - static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); @@ -304,8 +316,7 @@ fail_and_free: dstbuf = opt->dst1; #endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skb, - net->ipv6.sysctl.max_dst_opts_cnt)) { + if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) @@ -928,6 +939,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) return false; } +/* IOAM */ + +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) +{ + struct ioam6_trace_hdr *trace; + struct ioam6_namespace *ns; + struct ioam6_hdr *hdr; + + /* Bad alignment (must be 4n-aligned) */ + if (optoff & 3) + goto drop; + + /* Ignore if IOAM is not enabled on ingress */ + if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + goto ignore; + + /* Truncated Option header */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + if (hdr->opt_len < 2) + goto drop; + + switch (hdr->type) { + case IOAM6_TYPE_PREALLOC: + /* Truncated Pre-allocated Trace header */ + if (hdr->opt_len < 2 + sizeof(*trace)) + goto drop; + + /* Malformed Pre-allocated Trace header */ + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); + if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) + goto drop; + + /* Ignore if the IOAM namespace is unknown */ + ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id); + if (!ns) + goto ignore; + + if (!skb_valid_dst(skb)) + ip6_route_input(skb); + + ioam6_fill_trace_data(skb, ns, trace); + break; + default: + break; + } + +ignore: + return true; + +drop: + kfree_skb(skb); + return false; +} + /* Jumbo payload */ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) @@ -994,22 +1059,6 @@ drop: return false; } -static const struct tlvtype_proc tlvprochopopt_lst[] = { - { - .type = IPV6_TLV_ROUTERALERT, - .func = ipv6_hop_ra, - }, - { - .type = IPV6_TLV_JUMBO, - .func = ipv6_hop_jumbo, - }, - { - .type = IPV6_TLV_CALIPSO, - .func = ipv6_hop_calipso, - }, - { -1, } -}; - int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); @@ -1035,8 +1084,7 @@ fail_and_free: goto fail_and_free; opt->flags |= IP6SKB_HOPBYHOP; - if (ip6_parse_tlv(tlvprochopopt_lst, skb, - net->ipv6.sysctl.max_hbh_opts_cnt)) { + if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c new file mode 100644 index 000000000000..5e8961004832 --- /dev/null +++ b/net/ipv6/ioam6.c @@ -0,0 +1,910 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/ioam6.h> +#include <linux/ioam6_genl.h> +#include <linux/rhashtable.h> + +#include <net/addrconf.h> +#include <net/genetlink.h> +#include <net/ioam6.h> + +static void ioam6_ns_release(struct ioam6_namespace *ns) +{ + kfree_rcu(ns, rcu); +} + +static void ioam6_sc_release(struct ioam6_schema *sc) +{ + kfree_rcu(sc, rcu); +} + +static void ioam6_free_ns(void *ptr, void *arg) +{ + struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr; + + if (ns) + ioam6_ns_release(ns); +} + +static void ioam6_free_sc(void *ptr, void *arg) +{ + struct ioam6_schema *sc = (struct ioam6_schema *)ptr; + + if (sc) + ioam6_sc_release(sc); +} + +static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_namespace *ns = obj; + + return (ns->id != *(__be16 *)arg->key); +} + +static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_schema *sc = obj; + + return (sc->id != *(u32 *)arg->key); +} + +static const struct rhashtable_params rht_ns_params = { + .key_len = sizeof(__be16), + .key_offset = offsetof(struct ioam6_namespace, id), + .head_offset = offsetof(struct ioam6_namespace, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_ns_cmpfn, +}; + +static const struct rhashtable_params rht_sc_params = { + .key_len = sizeof(u32), + .key_offset = offsetof(struct ioam6_schema, id), + .head_offset = offsetof(struct ioam6_schema, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_sc_cmpfn, +}; + +static struct genl_family ioam6_genl_family; + +static const struct nla_policy ioam6_genl_policy_addns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 }, + [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 }, +}; + +static const struct nla_policy ioam6_genl_policy_delns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, +}; + +static const struct nla_policy ioam6_genl_policy_addsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY, + .len = IOAM6_MAX_SCHEMA_DATA_LEN }, +}; + +static const struct nla_policy ioam6_genl_policy_delsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, +}; + +static const struct nla_policy ioam6_genl_policy_ns_sc[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG }, +}; + +static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + u64 data64; + u32 data32; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (ns) { + err = -EEXIST; + goto out_unlock; + } + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) { + err = -ENOMEM; + goto out_unlock; + } + + ns->id = id; + + if (!info->attrs[IOAM6_ATTR_NS_DATA]) + data32 = IOAM6_U32_UNAVAILABLE; + else + data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]); + + if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE]) + data64 = IOAM6_U64_UNAVAILABLE; + else + data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]); + + ns->data = cpu_to_be32(data32); + ns->data_wide = cpu_to_be64(data64); + + err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + kfree(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + sc = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + goto out_unlock; + + if (sc) + rcu_assign_pointer(sc->ns, NULL); + + ioam6_ns_release(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns, + u32 portid, + u32 seq, + u32 flags, + struct sk_buff *skb, + u8 cmd) +{ + struct ioam6_schema *sc; + u64 data64; + u32 data32; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + data32 = be32_to_cpu(ns->data); + data64 = be64_to_cpu(ns->data_wide); + + if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) || + (data32 != IOAM6_U32_UNAVAILABLE && + nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) || + (data64 != IOAM6_U64_UNAVAILABLE && + nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE, + data64, IOAM6_ATTR_PAD))) + goto nla_put_failure; + + rcu_read_lock(); + + sc = rcu_dereference(ns->schema); + if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpns_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->namespaces, iter); + + return 0; +} + +static int ioam6_genl_dumpns_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_namespace *ns; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + ns = rhashtable_walk_next(iter); + + if (IS_ERR(ns)) { + if (PTR_ERR(ns) == -EAGAIN) + continue; + err = PTR_ERR(ns); + goto done; + } else if (!ns) { + break; + } + + err = __ioam6_genl_dumpns_element(ns, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_NAMESPACES); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + int len, len_aligned, err; + struct ioam6_schema *sc; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (sc) { + err = -EEXIST; + goto out_unlock; + } + + len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]); + len_aligned = ALIGN(len, 4); + + sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL); + if (!sc) { + err = -ENOMEM; + goto out_unlock; + } + + sc->id = id; + sc->len = len_aligned; + sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24)); + nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len); + + err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto free_sc; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +free_sc: + kfree(sc); + goto out_unlock; +} + +static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + int err; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + + ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto out_unlock; + + if (ns) + rcu_assign_pointer(ns->schema, NULL); + + ioam6_sc_release(sc); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + struct ioam6_namespace *ns; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) || + nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data)) + goto nla_put_failure; + + rcu_read_lock(); + + ns = rcu_dereference(sc->ns); + if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpsc_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->schemas, iter); + + return 0; +} + +static int ioam6_genl_dumpsc_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_schema *sc; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + sc = rhashtable_walk_next(iter); + + if (IS_ERR(sc)) { + if (PTR_ERR(sc) == -EAGAIN) + continue; + err = PTR_ERR(sc); + goto done; + } else if (!sc) { + break; + } + + err = __ioam6_genl_dumpsc_element(sc, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_SCHEMAS); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_namespace *ns, *ns_ref; + struct ioam6_schema *sc, *sc_ref; + struct ioam6_pernet_data *nsdata; + __be16 ns_id; + u32 sc_id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID] || + (!info->attrs[IOAM6_ATTR_SC_ID] && + !info->attrs[IOAM6_ATTR_SC_NONE])) + return -EINVAL; + + ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + if (info->attrs[IOAM6_ATTR_SC_NONE]) { + sc = NULL; + } else { + sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id, + rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + } + + sc_ref = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + if (sc_ref) + rcu_assign_pointer(sc_ref->ns, NULL); + rcu_assign_pointer(ns->schema, sc); + + if (sc) { + ns_ref = rcu_dereference_protected(sc->ns, + lockdep_is_held(&nsdata->lock)); + if (ns_ref) + rcu_assign_pointer(ns_ref->schema, NULL); + rcu_assign_pointer(sc->ns, ns); + } + + err = 0; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static const struct genl_ops ioam6_genl_ops[] = { + { + .cmd = IOAM6_CMD_ADD_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_NAMESPACES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpns_start, + .dumpit = ioam6_genl_dumpns, + .done = ioam6_genl_dumpns_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_ADD_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_SCHEMAS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpsc_start, + .dumpit = ioam6_genl_dumpsc, + .done = ioam6_genl_dumpsc_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_NS_SET_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_ns_set_schema, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_ns_sc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1, + }, +}; + +static struct genl_family ioam6_genl_family __ro_after_init = { + .name = IOAM6_GENL_NAME, + .version = IOAM6_GENL_VERSION, + .netnsok = true, + .parallel_ops = true, + .ops = ioam6_genl_ops, + .n_ops = ARRAY_SIZE(ioam6_genl_ops), + .module = THIS_MODULE, +}; + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); +} + +static void __ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace, + struct ioam6_schema *sc, + u8 sclen) +{ + struct __kernel_sock_timeval ts; + u64 raw64; + u32 raw32; + u16 raw16; + u8 *data; + u8 byte; + + data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; + + /* hop_lim and node_id */ + if (trace->type.bit0) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; + + *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); + data += sizeof(__be32); + } + + /* ingress_if_id and egress_if_id */ + if (trace->type.bit1) { + if (!skb->dev) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + } + + /* timestamp seconds */ + if (trace->type.bit2) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + skb_get_new_timestamp(skb, &ts); + *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); + } + data += sizeof(__be32); + } + + /* timestamp subseconds */ + if (trace->type.bit3) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + if (!trace->type.bit2) + skb_get_new_timestamp(skb, &ts); + + *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + } + data += sizeof(__be32); + } + + /* transit delay */ + if (trace->type.bit4) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* namespace data */ + if (trace->type.bit5) { + *(__be32 *)data = ns->data; + data += sizeof(__be32); + } + + /* queue depth */ + if (trace->type.bit6) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* checksum complement */ + if (trace->type.bit7) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* hop_lim and node_id (wide) */ + if (trace->type.bit8) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; + + *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); + data += sizeof(__be64); + } + + /* ingress_if_id and egress_if_id (wide) */ + if (trace->type.bit9) { + if (!skb->dev) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + } + + /* namespace data (wide) */ + if (trace->type.bit10) { + *(__be64 *)data = ns->data_wide; + data += sizeof(__be64); + } + + /* buffer occupancy */ + if (trace->type.bit11) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* opaque state snapshot */ + if (trace->type.bit22) { + if (!sc) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8); + } else { + *(__be32 *)data = sc->hdr; + data += sizeof(__be32); + + memcpy(data, sc->data, sc->len); + } + } +} + +/* called with rcu_read_lock() */ +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_schema *sc; + u8 sclen = 0; + + /* Skip if Overflow flag is set OR + * if an unknown type (bit 12-21) is set + */ + if (trace->overflow || + trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21) { + return; + } + + /* NodeLen does not include Opaque State Snapshot length. We need to + * take it into account if the corresponding bit is set (bit 22) and + * if the current IOAM namespace has an active schema attached to it + */ + sc = rcu_dereference(ns->schema); + if (trace->type.bit22) { + sclen = sizeof_field(struct ioam6_schema, hdr) / 4; + + if (sc) + sclen += sc->len / 4; + } + + /* If there is no space remaining, we set the Overflow flag and we + * skip without filling the trace + */ + if (!trace->remlen || trace->remlen < trace->nodelen + sclen) { + trace->overflow = 1; + return; + } + + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); + trace->remlen -= trace->nodelen + sclen; +} + +static int __net_init ioam6_net_init(struct net *net) +{ + struct ioam6_pernet_data *nsdata; + int err = -ENOMEM; + + nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL); + if (!nsdata) + goto out; + + mutex_init(&nsdata->lock); + net->ipv6.ioam6_data = nsdata; + + err = rhashtable_init(&nsdata->namespaces, &rht_ns_params); + if (err) + goto free_nsdata; + + err = rhashtable_init(&nsdata->schemas, &rht_sc_params); + if (err) + goto free_rht_ns; + +out: + return err; +free_rht_ns: + rhashtable_destroy(&nsdata->namespaces); +free_nsdata: + kfree(nsdata); + net->ipv6.ioam6_data = NULL; + goto out; +} + +static void __net_exit ioam6_net_exit(struct net *net) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL); + rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL); + + kfree(nsdata); +} + +static struct pernet_operations ioam6_net_ops = { + .init = ioam6_net_init, + .exit = ioam6_net_exit, +}; + +int __init ioam6_init(void) +{ + int err = register_pernet_subsys(&ioam6_net_ops); + if (err) + goto out; + + err = genl_register_family(&ioam6_genl_family); + if (err) + goto out_unregister_pernet_subsys; + +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + err = ioam6_iptunnel_init(); + if (err) + goto out_unregister_genl; +#endif + + pr_info("In-situ OAM (IOAM) with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL +out_unregister_genl: + genl_unregister_family(&ioam6_genl_family); +#endif +out_unregister_pernet_subsys: + unregister_pernet_subsys(&ioam6_net_ops); + goto out; +} + +void ioam6_exit(void) +{ +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + ioam6_iptunnel_exit(); +#endif + genl_unregister_family(&ioam6_genl_family); + unregister_pernet_subsys(&ioam6_net_ops); +} diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c new file mode 100644 index 000000000000..f9ee04541c17 --- /dev/null +++ b/net/ipv6/ioam6_iptunnel.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM Lightweight Tunnel implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/netlink.h> +#include <linux/in6.h> +#include <linux/ioam6.h> +#include <linux/ioam6_iptunnel.h> +#include <net/dst.h> +#include <net/sock.h> +#include <net/lwtunnel.h> +#include <net/ioam6.h> + +#define IOAM6_MASK_SHORT_FIELDS 0xff100000 +#define IOAM6_MASK_WIDE_FIELDS 0xe00000 + +struct ioam6_lwt_encap { + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; +} __packed; + +struct ioam6_lwt { + struct ioam6_lwt_encap tuninfo; +}; + +static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) +{ + return (struct ioam6_lwt *)lwt->data; +} + +static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) +{ + return &ioam6_lwt_state(lwt)->tuninfo; +} + +static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) +{ + return &(ioam6_lwt_state(lwt)->tuninfo.traceh); +} + +static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), +}; + +static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_trace_hdr *data; + struct nlattr *nla; + int len; + + len = sizeof(*trace); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, trace, len); + + return 0; +} + +static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) +{ + u32 fields; + + if (!trace->type_be32 || !trace->remlen || + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) + return false; + + trace->nodelen = 0; + fields = be32_to_cpu(trace->type_be32); + + trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) + * (sizeof(__be32) / 4); + trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) + * (sizeof(__be64) / 4); + + return true; +} + +static int ioam6_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; + struct ioam6_lwt_encap *tuninfo; + struct ioam6_trace_hdr *trace; + struct lwtunnel_state *s; + int len_aligned; + int len, err; + + if (family != AF_INET6) + return -EINVAL; + + err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla, + ioam6_iptunnel_policy, extack); + if (err < 0) + return err; + + if (!tb[IOAM6_IPTUNNEL_TRACE]) { + NL_SET_ERR_MSG(extack, "missing trace"); + return -EINVAL; + } + + trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]); + if (!ioam6_validate_trace_hdr(trace)) { + NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE], + "invalid trace validation"); + return -EINVAL; + } + + len = sizeof(*tuninfo) + trace->remlen * 4; + len_aligned = ALIGN(len, 8); + + s = lwtunnel_state_alloc(len_aligned); + if (!s) + return -ENOMEM; + + tuninfo = ioam6_lwt_info(s); + tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; + tuninfo->pad[0] = IPV6_TLV_PADN; + tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; + tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; + tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace) + + trace->remlen * 4; + + memcpy(&tuninfo->traceh, trace, sizeof(*trace)); + + len = len_aligned - len; + if (len == 1) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; + } else if (len > 0) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; + tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; + } + + s->type = LWTUNNEL_ENCAP_IOAM6; + s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = s; + + return 0; +} + +static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) +{ + struct ioam6_trace_hdr *trace; + struct ipv6hdr *oldhdr, *hdr; + struct ioam6_namespace *ns; + int hdrlen, err; + + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; + + err = skb_cow_head(skb, hdrlen + skb->mac_len); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + skb_pull(skb, sizeof(*oldhdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr)); + + skb_push(skb, sizeof(*oldhdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + memmove(hdr, oldhdr, sizeof(*oldhdr)); + tuninfo->eh.nexthdr = hdr->nexthdr; + + skb_set_transport_header(skb, sizeof(*hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen); + + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); + + hdr->nexthdr = NEXTHDR_HOP; + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); + + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); + + ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace); + + return 0; +} + +static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; + int err = -EINVAL; + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + /* Only for packets we send and + * that do not contain a Hop-by-Hop yet + */ + if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; + + err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); + if (unlikely(err)) + goto drop; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + goto drop; + +out: + return lwt->orig_output(net, sk, skb); + +drop: + kfree_skb(skb); + return err; +} + +static int ioam6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) + return -EMSGSIZE; + + return 0; +} + +static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + return nla_total_size(sizeof(*trace)); +} + +static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); + struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); + + return (a_hdr->namespace_id != b_hdr->namespace_id); +} + +static const struct lwtunnel_encap_ops ioam6_iptun_ops = { + .build_state = ioam6_build_state, + .output = ioam6_output, + .fill_encap = ioam6_fill_encap_info, + .get_encap_size = ioam6_encap_nlsize, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, +}; + +int __init ioam6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} + +void ioam6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2d650dc24349..a8f118e469b7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2449,8 +2449,8 @@ int __init fib6_init(void) int ret = -ENOMEM; fib6_node_kmem = kmem_cache_create("fib6_nodes", - sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN, + sizeof(struct fib6_node), 0, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!fib6_node_kmem) goto out; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bc224f917bbd..3ad201d372d8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1244,8 +1244,9 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, memcpy(u->name, p->name, sizeof(u->name)); } -static int ip6gre_tunnel_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int ip6gre_tunnel_siocdevprivate(struct net_device *dev, + struct ifreq *ifr, void __user *data, + int cmd) { int err = 0; struct ip6_tnl_parm2 p; @@ -1259,7 +1260,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, switch (cmd) { case SIOCGETTUNNEL: if (dev == ign->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -1270,7 +1271,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; @@ -1281,7 +1282,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, goto done; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) goto done; err = -EINVAL; @@ -1318,7 +1319,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); @@ -1331,7 +1332,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, if (dev == ign->fb_tunnel_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) goto done; err = -ENOENT; ip6gre_tnl_parm_from_user(&p1, &p); @@ -1398,7 +1399,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_init = ip6gre_tunnel_init, .ndo_uninit = ip6gre_tunnel_uninit, .ndo_start_xmit = ip6gre_tunnel_xmit, - .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e6ca9ad6812..12f985f43bcc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); - int delta = hh_len - skb_headroom(skb); - const struct in6_addr *nexthop; + const struct in6_addr *daddr, *nexthop; + struct ipv6hdr *hdr; struct neighbour *neigh; int ret; /* Be paranoid, rather than too clever. */ - if (unlikely(delta > 0) && dev->header_ops) { - /* pskb_expand_head() might crash, if skb is shared */ - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (likely(nskb)) { - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); - consume_skb(skb); - } else { - kfree_skb(skb); - } - skb = nskb; - } - if (skb && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - skb = NULL; - } + if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + skb = skb_expand_head(skb, hh_len); if (!skb) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOMEM; } } - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - + hdr = ipv6_hdr(skb); + daddr = &hdr->daddr; + if (ipv6_addr_is_multicast(daddr)) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || - ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr))) { + ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing @@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); - if (ipv6_hdr(skb)->hop_limit == 0) { + if (hdr->hop_limit == 0) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); - - if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= - IPV6_ADDR_SCOPE_NODELOCAL && + if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && !(dev->flags & IFF_LOOPBACK)) { kfree_skb(skb); return 0; @@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); - neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); + neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); @@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_unlock_bh(); - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; @@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int hlimit = -1; u32 mtu; - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; - if (unlikely(skb_headroom(skb) < head_room)) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); + if (unlikely(head_room > skb_headroom(skb))) { + skb = skb_expand_head(skb, head_room); + if (!skb) { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOBUFS; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (opt) { @@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUT, skb->len); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dst->dev, + net, (struct sock *)sk, skb, NULL, dev, dst_output); } - skb->dev = dst->dev; + skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -608,7 +584,7 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = ip6_dst_mtu_forward(dst); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 322698d9fcf4..20a67efda47f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1581,9 +1581,10 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) } /** - * ip6_tnl_ioctl - configure ipv6 tunnels from userspace + * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel - * @ifr: parameters passed from userspace + * @ifr: unused + * @data: parameters passed from userspace * @cmd: command to be performed * * Description: @@ -1609,7 +1610,8 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) **/ static int -ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm p; @@ -1623,7 +1625,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -1635,9 +1637,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) memset(&p, 0, sizeof(p)); } ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; - } break; case SIOCADDTUNNEL: case SIOCCHGTUNNEL: @@ -1645,7 +1646,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && @@ -1669,7 +1670,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else { @@ -1683,7 +1684,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); @@ -1802,7 +1803,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_start_xmit, - .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_siocdevprivate = ip6_tnl_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 2d048e21abbb..1d8e3ffa225d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -771,13 +771,14 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) } /** - * vti6_ioctl - configure vti6 tunnels from userspace + * vti6_siocdevprivate - configure vti6 tunnels from userspace * @dev: virtual device associated with tunnel - * @ifr: parameters passed from userspace + * @ifr: unused + * @data: parameters passed from userspace * @cmd: command to be performed * * Description: - * vti6_ioctl() is used for managing vti6 tunnels + * vti6_siocdevprivate() is used for managing vti6 tunnels * from userspace. * * The possible commands are the following: @@ -798,7 +799,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) * %-ENODEV if attempting to change or delete a nonexisting device **/ static int -vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm2 p; @@ -810,7 +811,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -822,7 +823,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!t) t = netdev_priv(dev); vti6_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; case SIOCADDTUNNEL: @@ -831,7 +832,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != 0) @@ -852,7 +853,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (t) { err = 0; vti6_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else @@ -865,7 +866,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; vti6_parm_from_user(&p1, &p); @@ -890,7 +891,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, - .ndo_do_ioctl = vti6_ioctl, + .ndo_siocdevprivate = vti6_siocdevprivate, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 06b0d2c329b9..36ed9efb8825 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) reg_dev = mrt->vif_table[reg_vif_num].dev; - if (reg_dev) - dev_hold(reg_dev); + dev_hold(reg_dev); read_unlock(&mrt_lock); if (!reg_dev) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a6804a7e34c1..e4bdb09c5586 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) goto out_free_gsf; - ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); + ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return ret; @@ -234,7 +234,7 @@ out_free_gsf: static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; void *p; int ret; @@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ ret = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; @@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_p; ret = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_p; ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, - .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex); out_free_p: kfree(p); @@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, static int ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; @@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; lock_sock(sk); - err = ip6_mc_msfget(sk, &gf, p->gf_slist); + err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex); release_sock(sk); if (err) return err; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54ec163fbafa..cd951faa2fac 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } psl = newpsl; @@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } if (gsf->gf_numsrc) { - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + gsf->gf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { mutex_unlock(&idev->mc_lock); - sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } mutex_unlock(&idev->mc_lock); @@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); @@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, psl->sl_count, psl->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b6ddf23d3833..6cf4bb89ca69 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3201,25 +3201,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst) { - struct inet6_dev *idev; - unsigned int mtu; - - mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu) - goto out; - - mtu = IPV6_MIN_MTU; - - rcu_read_lock(); - idev = __in6_dev_get(dst->dev); - if (idev) - mtu = idev->cnf.mtu6; - rcu_read_unlock(); - -out: - mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); - - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + return ip6_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ip6_mtu); @@ -3644,8 +3626,7 @@ out: if (err) { lwtstate_put(fib6_nh->fib_nh_lws); fib6_nh->fib_nh_lws = NULL; - if (dev) - dev_put(dev); + dev_put(dev); } return err; @@ -6638,7 +6619,7 @@ int __init ip6_route_init(void) ret = -ENOMEM; ip6_dst_ops_template.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!ip6_dst_ops_template.kmem_cachep) goto out; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index df5bea818410..ef0c7a7c18e2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -299,9 +299,8 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) +static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a) { - struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data; struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; @@ -321,7 +320,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) * we try harder to allocate. */ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? - kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) : + kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; rcu_read_lock(); @@ -334,7 +333,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) * For root users, retry allocating enough memory for * the answer. */ - kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); + kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT | + __GFP_NOWARN); if (!kp) { ret = -ENOMEM; goto out; @@ -453,8 +453,8 @@ out: return err; } -static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, - int cmd) +static int ipip6_tunnel_prl_ctl(struct net_device *dev, + struct ip_tunnel_prl __user *data, int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl prl; @@ -465,7 +465,7 @@ static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; - if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + if (copy_from_user(&prl, data, sizeof(prl))) return -EFAULT; switch (cmd) { @@ -1197,14 +1197,14 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, } static int -ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) +ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; struct ip_tunnel_parm p; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) return -EFAULT; t = ipip6_tunnel_locate(t->net, &p, 0); } @@ -1215,13 +1215,14 @@ ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) ip6rd.relay_prefix = t->ip6rd.relay_prefix; ip6rd.prefixlen = t->ip6rd.prefixlen; ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd))) + if (copy_to_user(data, &ip6rd, sizeof(ip6rd))) return -EFAULT; return 0; } static int -ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data, + int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; @@ -1229,7 +1230,7 @@ ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd))) + if (copy_from_user(&ip6rd, data, sizeof(ip6rd))) return -EFAULT; if (cmd != SIOCDEL6RD) { @@ -1368,27 +1369,28 @@ ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) } static int -ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { switch (cmd) { case SIOCGETTUNNEL: case SIOCADDTUNNEL: case SIOCCHGTUNNEL: case SIOCDELTUNNEL: - return ip_tunnel_ioctl(dev, ifr, cmd); + return ip_tunnel_siocdevprivate(dev, ifr, data, cmd); case SIOCGETPRL: - return ipip6_tunnel_get_prl(dev, ifr); + return ipip6_tunnel_get_prl(dev, data); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: - return ipip6_tunnel_prl_ctl(dev, ifr, cmd); + return ipip6_tunnel_prl_ctl(dev, data, cmd); #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: - return ipip6_tunnel_get6rd(dev, ifr); + return ipip6_tunnel_get6rd(dev, data); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: - return ipip6_tunnel_6rdctl(dev, ifr, cmd); + return ipip6_tunnel_6rdctl(dev, data, cmd); #endif default: return -EINVAL; @@ -1399,7 +1401,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, - .ndo_do_ioctl = ipip6_tunnel_ioctl, + .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d7cf26f730d7..d53dd142bf87 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -21,6 +21,7 @@ #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif +#include <linux/ioam6.h> static int two = 2; static int three = 3; @@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; +static u32 ioam6_id_max = IOAM6_DEFAULT_ID; +static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "ioam6_id", + .data = &init_net.ipv6.sysctl.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra2 = &ioam6_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &init_net.ipv6.sysctl.ioam6_id_wide, + .maxlen = sizeof(u64), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra2 = &ioam6_id_wide_max, + }, { } }; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index ac5cadd02cfa..3086f4a6ae68 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -224,8 +224,7 @@ static int llc_ui_release(struct socket *sock) } else { release_sock(sk); } - if (llc->dev) - dev_put(llc->dev); + dev_put(llc->dev); sock_put(sk); llc_sk_free(sk); out: @@ -363,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) } else llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - if (llc->dev) - dev_hold(llc->dev); + dev_hold(llc->dev); rcu_read_unlock(); if (!llc->dev) goto out; diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig new file mode 100644 index 000000000000..2cdf3d0a28c9 --- /dev/null +++ b/net/mctp/Kconfig @@ -0,0 +1,13 @@ + +menuconfig MCTP + depends on NET + tristate "MCTP core protocol support" + help + Management Component Transport Protocol (MCTP) is an in-system + protocol for communicating between management controllers and + their managed devices (peripherals, host processors, etc.). The + protocol is defined by DMTF specification DSP0236. + + This option enables core MCTP support. For communicating with other + devices, you'll want to enable a driver for a specific hardware + channel. diff --git a/net/mctp/Makefile b/net/mctp/Makefile new file mode 100644 index 000000000000..0171333384d7 --- /dev/null +++ b/net/mctp/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_MCTP) += mctp.o +mctp-objs := af_mctp.o device.o route.o neigh.o diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c new file mode 100644 index 000000000000..a9526ac29dff --- /dev/null +++ b/net/mctp/af_mctp.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#include <linux/if_arp.h> +#include <linux/net.h> +#include <linux/mctp.h> +#include <linux/module.h> +#include <linux/socket.h> + +#include <net/mctp.h> +#include <net/mctpdevice.h> +#include <net/sock.h> + +/* socket implementation */ + +static int mctp_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + + return 0; +} + +static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +{ + struct sock *sk = sock->sk; + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + struct sockaddr_mctp *smctp; + int rc; + + if (addrlen < sizeof(*smctp)) + return -EINVAL; + + if (addr->sa_family != AF_MCTP) + return -EAFNOSUPPORT; + + if (!capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + /* it's a valid sockaddr for MCTP, cast and do protocol checks */ + smctp = (struct sockaddr_mctp *)addr; + + lock_sock(sk); + + /* TODO: allow rebind */ + if (sk_hashed(sk)) { + rc = -EADDRINUSE; + goto out_release; + } + msk->bind_net = smctp->smctp_network; + msk->bind_addr = smctp->smctp_addr.s_addr; + msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */ + + rc = sk->sk_prot->hash(sk); + +out_release: + release_sock(sk); + + return rc; +} + +static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) +{ + DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); + const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); + int rc, addrlen = msg->msg_namelen; + struct sock *sk = sock->sk; + struct mctp_skb_cb *cb; + struct mctp_route *rt; + struct sk_buff *skb; + + if (addr) { + if (addrlen < sizeof(struct sockaddr_mctp)) + return -EINVAL; + if (addr->smctp_family != AF_MCTP) + return -EINVAL; + if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER)) + return -EINVAL; + + } else { + /* TODO: connect()ed sockets */ + return -EDESTADDRREQ; + } + + if (!capable(CAP_NET_RAW)) + return -EACCES; + + if (addr->smctp_network == MCTP_NET_ANY) + addr->smctp_network = mctp_default_net(sock_net(sk)); + + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr); + if (!rt) + return -EHOSTUNREACH; + + skb = sock_alloc_send_skb(sk, hlen + 1 + len, + msg->msg_flags & MSG_DONTWAIT, &rc); + if (!skb) + return rc; + + skb_reserve(skb, hlen); + + /* set type as fist byte in payload */ + *(u8 *)skb_put(skb, 1) = addr->smctp_type; + + rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len); + if (rc < 0) { + kfree_skb(skb); + return rc; + } + + /* set up cb */ + cb = __mctp_cb(skb); + cb->net = addr->smctp_network; + + rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, + addr->smctp_tag); + + return rc ? : len; +} + +static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); + struct sock *sk = sock->sk; + struct sk_buff *skb; + size_t msglen; + u8 type; + int rc; + + if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) + return -EOPNOTSUPP; + + skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc); + if (!skb) + return rc; + + if (!skb->len) { + rc = 0; + goto out_free; + } + + /* extract message type, remove from data */ + type = *((u8 *)skb->data); + msglen = skb->len - 1; + + if (len < msglen) + msg->msg_flags |= MSG_TRUNC; + else + len = msglen; + + rc = skb_copy_datagram_msg(skb, 1, msg, len); + if (rc < 0) + goto out_free; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (addr) { + struct mctp_skb_cb *cb = mctp_cb(skb); + /* TODO: expand mctp_skb_cb for header fields? */ + struct mctp_hdr *hdr = mctp_hdr(skb); + + addr = msg->msg_name; + addr->smctp_family = AF_MCTP; + addr->smctp_network = cb->net; + addr->smctp_addr.s_addr = hdr->src; + addr->smctp_type = type; + addr->smctp_tag = hdr->flags_seq_tag & + (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + msg->msg_namelen = sizeof(*addr); + } + + rc = len; + + if (flags & MSG_TRUNC) + rc = msglen; + +out_free: + skb_free_datagram(sk, skb); + return rc; +} + +static int mctp_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + return -EINVAL; +} + +static int mctp_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EINVAL; +} + +static const struct proto_ops mctp_dgram_ops = { + .family = PF_MCTP, + .release = mctp_release, + .bind = mctp_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = sock_no_ioctl, + .gettstamp = sock_gettstamp, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = mctp_setsockopt, + .getsockopt = mctp_getsockopt, + .sendmsg = mctp_sendmsg, + .recvmsg = mctp_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int mctp_sk_init(struct sock *sk) +{ + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + + INIT_HLIST_HEAD(&msk->keys); + return 0; +} + +static void mctp_sk_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int mctp_sk_hash(struct sock *sk) +{ + struct net *net = sock_net(sk); + + mutex_lock(&net->mctp.bind_lock); + sk_add_node_rcu(sk, &net->mctp.binds); + mutex_unlock(&net->mctp.bind_lock); + + return 0; +} + +static void mctp_sk_unhash(struct sock *sk) +{ + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + struct net *net = sock_net(sk); + struct mctp_sk_key *key; + struct hlist_node *tmp; + unsigned long flags; + + /* remove from any type-based binds */ + mutex_lock(&net->mctp.bind_lock); + sk_del_node_init_rcu(sk); + mutex_unlock(&net->mctp.bind_lock); + + /* remove tag allocations */ + spin_lock_irqsave(&net->mctp.keys_lock, flags); + hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { + hlist_del_rcu(&key->sklist); + hlist_del_rcu(&key->hlist); + + spin_lock(&key->reasm_lock); + if (key->reasm_head) + kfree_skb(key->reasm_head); + key->reasm_head = NULL; + key->reasm_dead = true; + spin_unlock(&key->reasm_lock); + + kfree_rcu(key, rcu); + } + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + synchronize_rcu(); +} + +static struct proto mctp_proto = { + .name = "MCTP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct mctp_sock), + .init = mctp_sk_init, + .close = mctp_sk_close, + .hash = mctp_sk_hash, + .unhash = mctp_sk_unhash, +}; + +static int mctp_pf_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + const struct proto_ops *ops; + struct proto *proto; + struct sock *sk; + int rc; + + if (protocol) + return -EPROTONOSUPPORT; + + /* only datagram sockets are supported */ + if (sock->type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; + + proto = &mctp_proto; + ops = &mctp_dgram_ops; + + sock->state = SS_UNCONNECTED; + sock->ops = ops; + + sk = sk_alloc(net, PF_MCTP, GFP_KERNEL, proto, kern); + if (!sk) + return -ENOMEM; + + sock_init_data(sock, sk); + + rc = 0; + if (sk->sk_prot->init) + rc = sk->sk_prot->init(sk); + + if (rc) + goto err_sk_put; + + return 0; + +err_sk_put: + sock_orphan(sk); + sock_put(sk); + return rc; +} + +static struct net_proto_family mctp_pf = { + .family = PF_MCTP, + .create = mctp_pf_create, + .owner = THIS_MODULE, +}; + +static __init int mctp_init(void) +{ + int rc; + + /* ensure our uapi tag definitions match the header format */ + BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO); + BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK); + + pr_info("mctp: management component transport protocol core\n"); + + rc = sock_register(&mctp_pf); + if (rc) + return rc; + + rc = proto_register(&mctp_proto, 0); + if (rc) + goto err_unreg_sock; + + rc = mctp_routes_init(); + if (rc) + goto err_unreg_proto; + + rc = mctp_neigh_init(); + if (rc) + goto err_unreg_proto; + + mctp_device_init(); + + return 0; + +err_unreg_proto: + proto_unregister(&mctp_proto); +err_unreg_sock: + sock_unregister(PF_MCTP); + + return rc; +} + +static __exit void mctp_exit(void) +{ + mctp_device_exit(); + mctp_neigh_exit(); + mctp_routes_exit(); + proto_unregister(&mctp_proto); + sock_unregister(PF_MCTP); +} + +module_init(mctp_init); +module_exit(mctp_exit); + +MODULE_DESCRIPTION("MCTP core"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>"); + +MODULE_ALIAS_NETPROTO(PF_MCTP); diff --git a/net/mctp/device.c b/net/mctp/device.c new file mode 100644 index 000000000000..b9f38e765f61 --- /dev/null +++ b/net/mctp/device.c @@ -0,0 +1,423 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) - device implementation. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#include <linux/if_link.h> +#include <linux/mctp.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include <linux/rtnetlink.h> + +#include <net/addrconf.h> +#include <net/netlink.h> +#include <net/mctp.h> +#include <net/mctpdevice.h> +#include <net/sock.h> + +struct mctp_dump_cb { + int h; + int idx; + size_t a_idx; +}; + +/* unlocked: caller must hold rcu_read_lock */ +struct mctp_dev *__mctp_dev_get(const struct net_device *dev) +{ + return rcu_dereference(dev->mctp_ptr); +} + +struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) +{ + return rtnl_dereference(dev->mctp_ptr); +} + +static void mctp_dev_destroy(struct mctp_dev *mdev) +{ + struct net_device *dev = mdev->dev; + + dev_put(dev); + kfree_rcu(mdev, rcu); +} + +static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb, + struct mctp_dev *mdev, mctp_eid_t eid) +{ + struct ifaddrmsg *hdr; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->ifa_family = AF_MCTP; + hdr->ifa_prefixlen = 0; + hdr->ifa_flags = 0; + hdr->ifa_scope = 0; + hdr->ifa_index = mdev->dev->ifindex; + + if (nla_put_u8(skb, IFA_LOCAL, eid)) + goto cancel; + + if (nla_put_u8(skb, IFA_ADDRESS, eid)) + goto cancel; + + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct mctp_dump_cb *mcb = (void *)cb->ctx; + int rc = 0; + + for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) { + rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]); + if (rc < 0) + break; + } + + return rc; +} + +static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct mctp_dump_cb *mcb = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct hlist_head *head; + struct net_device *dev; + struct ifaddrmsg *hdr; + struct mctp_dev *mdev; + int ifindex; + int idx, rc; + + hdr = nlmsg_data(cb->nlh); + // filter by ifindex if requested + ifindex = hdr->ifa_index; + + rcu_read_lock(); + for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) { + idx = 0; + head = &net->dev_index_head[mcb->h]; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx >= mcb->idx && + (ifindex == 0 || ifindex == dev->ifindex)) { + mdev = __mctp_dev_get(dev); + if (mdev) { + rc = mctp_dump_dev_addrinfo(mdev, + skb, cb); + // Error indicates full buffer, this + // callback will get retried. + if (rc < 0) + goto out; + } + } + idx++; + // reset for next iteration + mcb->a_idx = 0; + } + } +out: + rcu_read_unlock(); + mcb->idx = idx; + + return skb->len; +} + +static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = { + [IFA_ADDRESS] = { .type = NLA_U8 }, + [IFA_LOCAL] = { .type = NLA_U8 }, +}; + +static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFA_MAX + 1]; + struct net_device *dev; + struct mctp_addr *addr; + struct mctp_dev *mdev; + struct ifaddrmsg *ifm; + unsigned long flags; + u8 *tmp_addrs; + int rc; + + rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy, + extack); + if (rc < 0) + return rc; + + ifm = nlmsg_data(nlh); + + if (tb[IFA_LOCAL]) + addr = nla_data(tb[IFA_LOCAL]); + else if (tb[IFA_ADDRESS]) + addr = nla_data(tb[IFA_ADDRESS]); + else + return -EINVAL; + + /* find device */ + dev = __dev_get_by_index(net, ifm->ifa_index); + if (!dev) + return -ENODEV; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODEV; + + if (!mctp_address_ok(addr->s_addr)) + return -EINVAL; + + /* Prevent duplicates. Under RTNL so don't need to lock for reading */ + if (memchr(mdev->addrs, addr->s_addr, mdev->num_addrs)) + return -EEXIST; + + tmp_addrs = kmalloc(mdev->num_addrs + 1, GFP_KERNEL); + if (!tmp_addrs) + return -ENOMEM; + memcpy(tmp_addrs, mdev->addrs, mdev->num_addrs); + tmp_addrs[mdev->num_addrs] = addr->s_addr; + + /* Lock to write */ + spin_lock_irqsave(&mdev->addrs_lock, flags); + mdev->num_addrs++; + swap(mdev->addrs, tmp_addrs); + spin_unlock_irqrestore(&mdev->addrs_lock, flags); + + kfree(tmp_addrs); + + mctp_route_add_local(mdev, addr->s_addr); + + return 0; +} + +static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFA_MAX + 1]; + struct net_device *dev; + struct mctp_addr *addr; + struct mctp_dev *mdev; + struct ifaddrmsg *ifm; + unsigned long flags; + u8 *pos; + int rc; + + rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy, + extack); + if (rc < 0) + return rc; + + ifm = nlmsg_data(nlh); + + if (tb[IFA_LOCAL]) + addr = nla_data(tb[IFA_LOCAL]); + else if (tb[IFA_ADDRESS]) + addr = nla_data(tb[IFA_ADDRESS]); + else + return -EINVAL; + + /* find device */ + dev = __dev_get_by_index(net, ifm->ifa_index); + if (!dev) + return -ENODEV; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODEV; + + pos = memchr(mdev->addrs, addr->s_addr, mdev->num_addrs); + if (!pos) + return -ENOENT; + + rc = mctp_route_remove_local(mdev, addr->s_addr); + // we can ignore -ENOENT in the case a route was already removed + if (rc < 0 && rc != -ENOENT) + return rc; + + spin_lock_irqsave(&mdev->addrs_lock, flags); + memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs)); + mdev->num_addrs--; + spin_unlock_irqrestore(&mdev->addrs_lock, flags); + + return 0; +} + +static struct mctp_dev *mctp_add_dev(struct net_device *dev) +{ + struct mctp_dev *mdev; + + ASSERT_RTNL(); + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&mdev->addrs_lock); + + mdev->net = mctp_default_net(dev_net(dev)); + + /* associate to net_device */ + rcu_assign_pointer(dev->mctp_ptr, mdev); + dev_hold(dev); + mdev->dev = dev; + + return mdev; +} + +static int mctp_fill_link_af(struct sk_buff *skb, + const struct net_device *dev, u32 ext_filter_mask) +{ + struct mctp_dev *mdev; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODATA; + if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net)) + return -EMSGSIZE; + return 0; +} + +static size_t mctp_get_link_af_size(const struct net_device *dev, + u32 ext_filter_mask) +{ + struct mctp_dev *mdev; + unsigned int ret; + + /* caller holds RCU */ + mdev = __mctp_dev_get(dev); + if (!mdev) + return 0; + ret = nla_total_size(4); /* IFLA_MCTP_NET */ + return ret; +} + +static const struct nla_policy ifla_af_mctp_policy[IFLA_MCTP_MAX + 1] = { + [IFLA_MCTP_NET] = { .type = NLA_U32 }, +}; + +static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_MCTP_MAX + 1]; + struct mctp_dev *mdev; + int rc; + + rc = nla_parse_nested(tb, IFLA_MCTP_MAX, attr, ifla_af_mctp_policy, + NULL); + if (rc) + return rc; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return 0; + + if (tb[IFLA_MCTP_NET]) + WRITE_ONCE(mdev->net, nla_get_u32(tb[IFLA_MCTP_NET])); + + return 0; +} + +static void mctp_unregister(struct net_device *dev) +{ + struct mctp_dev *mdev; + + mdev = mctp_dev_get_rtnl(dev); + + if (!mdev) + return; + + RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL); + + mctp_route_remove_dev(mdev); + mctp_neigh_remove_dev(mdev); + kfree(mdev->addrs); + + mctp_dev_destroy(mdev); +} + +static int mctp_register(struct net_device *dev) +{ + struct mctp_dev *mdev; + + /* Already registered? */ + if (rtnl_dereference(dev->mctp_ptr)) + return 0; + + /* only register specific types; MCTP-specific and loopback for now */ + if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK) + return 0; + + mdev = mctp_add_dev(dev); + if (IS_ERR(mdev)) + return PTR_ERR(mdev); + + return 0; +} + +static int mctp_dev_notify(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + int rc; + + switch (event) { + case NETDEV_REGISTER: + rc = mctp_register(dev); + if (rc) + return notifier_from_errno(rc); + break; + case NETDEV_UNREGISTER: + mctp_unregister(dev); + break; + } + + return NOTIFY_OK; +} + +static struct rtnl_af_ops mctp_af_ops = { + .family = AF_MCTP, + .fill_link_af = mctp_fill_link_af, + .get_link_af_size = mctp_get_link_af_size, + .set_link_af = mctp_set_link_af, +}; + +static struct notifier_block mctp_dev_nb = { + .notifier_call = mctp_dev_notify, + .priority = ADDRCONF_NOTIFY_PRIORITY, +}; + +void __init mctp_device_init(void) +{ + register_netdevice_notifier(&mctp_dev_nb); + + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR, + NULL, mctp_dump_addrinfo, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR, + mctp_rtm_newaddr, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR, + mctp_rtm_deladdr, NULL, 0); + rtnl_af_register(&mctp_af_ops); +} + +void __exit mctp_device_exit(void) +{ + rtnl_af_unregister(&mctp_af_ops); + rtnl_unregister(PF_MCTP, RTM_DELADDR); + rtnl_unregister(PF_MCTP, RTM_NEWADDR); + rtnl_unregister(PF_MCTP, RTM_GETADDR); + + unregister_netdevice_notifier(&mctp_dev_nb); +} diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c new file mode 100644 index 000000000000..90ed2f02d1fb --- /dev/null +++ b/net/mctp/neigh.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) - routing + * implementation. + * + * This is currently based on a simple routing table, with no dst cache. The + * number of routes should stay fairly small, so the lookup cost is small. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#include <linux/idr.h> +#include <linux/mctp.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> + +#include <net/mctp.h> +#include <net/mctpdevice.h> +#include <net/netlink.h> +#include <net/sock.h> + +static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid, + enum mctp_neigh_source source, + size_t lladdr_len, const void *lladdr) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_neigh *neigh; + int rc; + + mutex_lock(&net->mctp.neigh_lock); + if (mctp_neigh_lookup(mdev, eid, NULL) == 0) { + rc = -EEXIST; + goto out; + } + + if (lladdr_len > sizeof(neigh->ha)) { + rc = -EINVAL; + goto out; + } + + neigh = kzalloc(sizeof(*neigh), GFP_KERNEL); + if (!neigh) { + rc = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&neigh->list); + neigh->dev = mdev; + dev_hold(neigh->dev->dev); + neigh->eid = eid; + neigh->source = source; + memcpy(neigh->ha, lladdr, lladdr_len); + + list_add_rcu(&neigh->list, &net->mctp.neighbours); + rc = 0; +out: + mutex_unlock(&net->mctp.neigh_lock); + return rc; +} + +static void __mctp_neigh_free(struct rcu_head *rcu) +{ + struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu); + + dev_put(neigh->dev->dev); + kfree(neigh); +} + +/* Removes all neighbour entries referring to a device */ +void mctp_neigh_remove_dev(struct mctp_dev *mdev) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_neigh *neigh, *tmp; + + mutex_lock(&net->mctp.neigh_lock); + list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) { + if (neigh->dev == mdev) { + list_del_rcu(&neigh->list); + /* TODO: immediate RTM_DELNEIGH */ + call_rcu(&neigh->rcu, __mctp_neigh_free); + } + } + + mutex_unlock(&net->mctp.neigh_lock); +} + +// TODO: add a "source" flag so netlink can only delete static neighbours? +static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_neigh *neigh, *tmp; + bool dropped = false; + + mutex_lock(&net->mctp.neigh_lock); + list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) { + if (neigh->dev == mdev && neigh->eid == eid) { + list_del_rcu(&neigh->list); + /* TODO: immediate RTM_DELNEIGH */ + call_rcu(&neigh->rcu, __mctp_neigh_free); + dropped = true; + } + } + + mutex_unlock(&net->mctp.neigh_lock); + return dropped ? 0 : -ENOENT; +} + +static const struct nla_policy nd_mctp_policy[NDA_MAX + 1] = { + [NDA_DST] = { .type = NLA_U8 }, + [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, +}; + +static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct mctp_dev *mdev; + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX + 1]; + int rc; + mctp_eid_t eid; + void *lladdr; + int lladdr_len; + + rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy, + extack); + if (rc < 0) { + NL_SET_ERR_MSG(extack, "lladdr too large?"); + return rc; + } + + if (!tb[NDA_DST]) { + NL_SET_ERR_MSG(extack, "Neighbour EID must be specified"); + return -EINVAL; + } + + if (!tb[NDA_LLADDR]) { + NL_SET_ERR_MSG(extack, "Neighbour lladdr must be specified"); + return -EINVAL; + } + + eid = nla_get_u8(tb[NDA_DST]); + if (!mctp_address_ok(eid)) { + NL_SET_ERR_MSG(extack, "Invalid neighbour EID"); + return -EINVAL; + } + + lladdr = nla_data(tb[NDA_LLADDR]); + lladdr_len = nla_len(tb[NDA_LLADDR]); + + ndm = nlmsg_data(nlh); + + dev = __dev_get_by_index(net, ndm->ndm_ifindex); + if (!dev) + return -ENODEV; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODEV; + + if (lladdr_len != dev->addr_len) { + NL_SET_ERR_MSG(extack, "Wrong lladdr length"); + return -EINVAL; + } + + return mctp_neigh_add(mdev, eid, MCTP_NEIGH_STATIC, + lladdr_len, lladdr); +} + +static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[NDA_MAX + 1]; + struct net_device *dev; + struct mctp_dev *mdev; + struct ndmsg *ndm; + int rc; + mctp_eid_t eid; + + rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy, + extack); + if (rc < 0) { + NL_SET_ERR_MSG(extack, "incorrect format"); + return rc; + } + + if (!tb[NDA_DST]) { + NL_SET_ERR_MSG(extack, "Neighbour EID must be specified"); + return -EINVAL; + } + eid = nla_get_u8(tb[NDA_DST]); + + ndm = nlmsg_data(nlh); + dev = __dev_get_by_index(net, ndm->ndm_ifindex); + if (!dev) + return -ENODEV; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODEV; + + return mctp_neigh_remove(mdev, eid); +} + +static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event, + unsigned int flags, struct mctp_neigh *neigh) +{ + struct net_device *dev = neigh->dev->dev; + struct nlmsghdr *nlh; + struct ndmsg *hdr; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); + if (!nlh) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->ndm_family = AF_MCTP; + hdr->ndm_ifindex = dev->ifindex; + hdr->ndm_state = 0; // TODO other state bits? + if (neigh->source == MCTP_NEIGH_STATIC) + hdr->ndm_state |= NUD_PERMANENT; + hdr->ndm_flags = 0; + hdr->ndm_type = RTN_UNICAST; // TODO: is loopback RTN_LOCAL? + + if (nla_put_u8(skb, NDA_DST, neigh->eid)) + goto cancel; + + if (nla_put(skb, NDA_LLADDR, dev->addr_len, neigh->ha)) + goto cancel; + + nlmsg_end(skb, nlh); + + return 0; +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mctp_rtm_getneigh(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int rc, idx, req_ifindex; + struct mctp_neigh *neigh; + struct ndmsg *ndmsg; + struct { + int idx; + } *cbctx = (void *)cb->ctx; + + ndmsg = nlmsg_data(cb->nlh); + req_ifindex = ndmsg->ndm_ifindex; + + idx = 0; + rcu_read_lock(); + list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) { + if (idx < cbctx->idx) + goto cont; + + rc = 0; + if (req_ifindex == 0 || req_ifindex == neigh->dev->dev->ifindex) + rc = mctp_fill_neigh(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI, neigh); + + if (rc) + break; +cont: + idx++; + } + rcu_read_unlock(); + + cbctx->idx = idx; + return skb->len; +} + +int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_neigh *neigh; + int rc = -EHOSTUNREACH; // TODO: or ENOENT? + + rcu_read_lock(); + list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) { + if (mdev == neigh->dev && eid == neigh->eid) { + if (ret_hwaddr) + memcpy(ret_hwaddr, neigh->ha, + sizeof(neigh->ha)); + rc = 0; + break; + } + } + rcu_read_unlock(); + return rc; +} + +/* namespace registration */ +static int __net_init mctp_neigh_net_init(struct net *net) +{ + struct netns_mctp *ns = &net->mctp; + + INIT_LIST_HEAD(&ns->neighbours); + mutex_init(&ns->neigh_lock); + return 0; +} + +static void __net_exit mctp_neigh_net_exit(struct net *net) +{ + struct netns_mctp *ns = &net->mctp; + struct mctp_neigh *neigh; + + list_for_each_entry(neigh, &ns->neighbours, list) + call_rcu(&neigh->rcu, __mctp_neigh_free); +} + +/* net namespace implementation */ + +static struct pernet_operations mctp_net_ops = { + .init = mctp_neigh_net_init, + .exit = mctp_neigh_net_exit, +}; + +int __init mctp_neigh_init(void) +{ + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH, + mctp_rtm_newneigh, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH, + mctp_rtm_delneigh, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH, + NULL, mctp_rtm_getneigh, 0); + + return register_pernet_subsys(&mctp_net_ops); +} + +void __exit mctp_neigh_exit(void) +{ + unregister_pernet_subsys(&mctp_net_ops); + rtnl_unregister(PF_MCTP, RTM_GETNEIGH); + rtnl_unregister(PF_MCTP, RTM_DELNEIGH); + rtnl_unregister(PF_MCTP, RTM_NEWNEIGH); +} diff --git a/net/mctp/route.c b/net/mctp/route.c new file mode 100644 index 000000000000..b3101375c8e7 --- /dev/null +++ b/net/mctp/route.c @@ -0,0 +1,1099 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) - routing + * implementation. + * + * This is currently based on a simple routing table, with no dst cache. The + * number of routes should stay fairly small, so the lookup cost is small. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#include <linux/idr.h> +#include <linux/mctp.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> + +#include <uapi/linux/if_arp.h> + +#include <net/mctp.h> +#include <net/mctpdevice.h> +#include <net/netlink.h> +#include <net/sock.h> + +static const unsigned int mctp_message_maxlen = 64 * 1024; + +/* route output callbacks */ +static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) +{ + struct mctp_skb_cb *cb = mctp_cb(skb); + struct mctp_hdr *mh; + struct sock *sk; + u8 type; + + WARN_ON(!rcu_read_lock_held()); + + /* TODO: look up in skb->cb? */ + mh = mctp_hdr(skb); + + if (!skb_headlen(skb)) + return NULL; + + type = (*(u8 *)skb->data) & 0x7f; + + sk_for_each_rcu(sk, &net->mctp.binds) { + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + + if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) + continue; + + if (msk->bind_type != type) + continue; + + if (msk->bind_addr != MCTP_ADDR_ANY && + msk->bind_addr != mh->dest) + continue; + + return msk; + } + + return NULL; +} + +static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, + mctp_eid_t peer, u8 tag) +{ + if (key->local_addr != local) + return false; + + if (key->peer_addr != peer) + return false; + + if (key->tag != tag) + return false; + + return true; +} + +static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, + mctp_eid_t peer) +{ + struct mctp_sk_key *key, *ret; + struct mctp_hdr *mh; + u8 tag; + + WARN_ON(!rcu_read_lock_held()); + + mh = mctp_hdr(skb); + tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + + ret = NULL; + + hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) { + if (mctp_key_match(key, mh->dest, peer, tag)) { + ret = key; + break; + } + } + + return ret; +} + +static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, + mctp_eid_t local, mctp_eid_t peer, + u8 tag, gfp_t gfp) +{ + struct mctp_sk_key *key; + + key = kzalloc(sizeof(*key), gfp); + if (!key) + return NULL; + + key->peer_addr = peer; + key->local_addr = local; + key->tag = tag; + key->sk = &msk->sk; + spin_lock_init(&key->reasm_lock); + + return key; +} + +static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) +{ + struct net *net = sock_net(&msk->sk); + struct mctp_sk_key *tmp; + unsigned long flags; + int rc = 0; + + spin_lock_irqsave(&net->mctp.keys_lock, flags); + + hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { + if (mctp_key_match(tmp, key->local_addr, key->peer_addr, + key->tag)) { + rc = -EEXIST; + break; + } + } + + if (!rc) { + hlist_add_head(&key->hlist, &net->mctp.keys); + hlist_add_head(&key->sklist, &msk->keys); + } + + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + return rc; +} + +/* Must be called with key->reasm_lock, which it will release. Will schedule + * the key for an RCU free. + */ +static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, + unsigned long flags) + __releases(&key->reasm_lock) +{ + struct sk_buff *skb; + + skb = key->reasm_head; + key->reasm_head = NULL; + key->reasm_dead = true; + spin_unlock_irqrestore(&key->reasm_lock, flags); + + spin_lock_irqsave(&net->mctp.keys_lock, flags); + hlist_del_rcu(&key->hlist); + hlist_del_rcu(&key->sklist); + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + kfree_rcu(key, rcu); + + if (skb) + kfree_skb(skb); +} + +static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) +{ + struct mctp_hdr *hdr = mctp_hdr(skb); + u8 exp_seq, this_seq; + + this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) + & MCTP_HDR_SEQ_MASK; + + if (!key->reasm_head) { + key->reasm_head = skb; + key->reasm_tailp = &(skb_shinfo(skb)->frag_list); + key->last_seq = this_seq; + return 0; + } + + exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; + + if (this_seq != exp_seq) + return -EINVAL; + + if (key->reasm_head->len + skb->len > mctp_message_maxlen) + return -EINVAL; + + skb->next = NULL; + skb->sk = NULL; + *key->reasm_tailp = skb; + key->reasm_tailp = &skb->next; + + key->last_seq = this_seq; + + key->reasm_head->data_len += skb->len; + key->reasm_head->len += skb->len; + key->reasm_head->truesize += skb->truesize; + + return 0; +} + +static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + struct mctp_sk_key *key; + struct mctp_sock *msk; + struct mctp_hdr *mh; + unsigned long f; + u8 tag, flags; + int rc; + + msk = NULL; + rc = -EINVAL; + + /* we may be receiving a locally-routed packet; drop source sk + * accounting + */ + skb_orphan(skb); + + /* ensure we have enough data for a header and a type */ + if (skb->len < sizeof(struct mctp_hdr) + 1) + goto out; + + /* grab header, advance data ptr */ + mh = mctp_hdr(skb); + skb_pull(skb, sizeof(struct mctp_hdr)); + + if (mh->ver != 1) + goto out; + + flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); + tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + + rcu_read_lock(); + + /* lookup socket / reasm context, exactly matching (src,dest,tag) */ + key = mctp_lookup_key(net, skb, mh->src); + + if (flags & MCTP_HDR_FLAG_SOM) { + if (key) { + msk = container_of(key->sk, struct mctp_sock, sk); + } else { + /* first response to a broadcast? do a more general + * key lookup to find the socket, but don't use this + * key for reassembly - we'll create a more specific + * one for future packets if required (ie, !EOM). + */ + key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY); + if (key) { + msk = container_of(key->sk, + struct mctp_sock, sk); + key = NULL; + } + } + + if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) + msk = mctp_lookup_bind(net, skb); + + if (!msk) { + rc = -ENOENT; + goto out_unlock; + } + + /* single-packet message? deliver to socket, clean up any + * pending key. + */ + if (flags & MCTP_HDR_FLAG_EOM) { + sock_queue_rcv_skb(&msk->sk, skb); + if (key) { + spin_lock_irqsave(&key->reasm_lock, f); + /* we've hit a pending reassembly; not much we + * can do but drop it + */ + __mctp_key_unlock_drop(key, net, f); + } + rc = 0; + goto out_unlock; + } + + /* broadcast response or a bind() - create a key for further + * packets for this message + */ + if (!key) { + key = mctp_key_alloc(msk, mh->dest, mh->src, + tag, GFP_ATOMIC); + if (!key) { + rc = -ENOMEM; + goto out_unlock; + } + + /* we can queue without the reasm lock here, as the + * key isn't observable yet + */ + mctp_frag_queue(key, skb); + + /* if the key_add fails, we've raced with another + * SOM packet with the same src, dest and tag. There's + * no way to distinguish future packets, so all we + * can do is drop; we'll free the skb on exit from + * this function. + */ + rc = mctp_key_add(key, msk); + if (rc) + kfree(key); + + } else { + /* existing key: start reassembly */ + spin_lock_irqsave(&key->reasm_lock, f); + + if (key->reasm_head || key->reasm_dead) { + /* duplicate start? drop everything */ + __mctp_key_unlock_drop(key, net, f); + rc = -EEXIST; + } else { + rc = mctp_frag_queue(key, skb); + spin_unlock_irqrestore(&key->reasm_lock, f); + } + } + + } else if (key) { + /* this packet continues a previous message; reassemble + * using the message-specific key + */ + + spin_lock_irqsave(&key->reasm_lock, f); + + /* we need to be continuing an existing reassembly... */ + if (!key->reasm_head) + rc = -EINVAL; + else + rc = mctp_frag_queue(key, skb); + + /* end of message? deliver to socket, and we're done with + * the reassembly/response key + */ + if (!rc && flags & MCTP_HDR_FLAG_EOM) { + sock_queue_rcv_skb(key->sk, key->reasm_head); + key->reasm_head = NULL; + __mctp_key_unlock_drop(key, net, f); + } else { + spin_unlock_irqrestore(&key->reasm_lock, f); + } + + } else { + /* not a start, no matching key */ + rc = -ENOENT; + } + +out_unlock: + rcu_read_unlock(); +out: + if (rc) + kfree_skb(skb); + return rc; +} + +static unsigned int mctp_route_mtu(struct mctp_route *rt) +{ + return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); +} + +static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) +{ + struct mctp_hdr *hdr = mctp_hdr(skb); + char daddr_buf[MAX_ADDR_LEN]; + char *daddr = NULL; + unsigned int mtu; + int rc; + + skb->protocol = htons(ETH_P_MCTP); + + mtu = READ_ONCE(skb->dev->mtu); + if (skb->len > mtu) { + kfree_skb(skb); + return -EMSGSIZE; + } + + /* If lookup fails let the device handle daddr==NULL */ + if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) + daddr = daddr_buf; + + rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), + daddr, skb->dev->dev_addr, skb->len); + if (rc) { + kfree_skb(skb); + return -EHOSTUNREACH; + } + + rc = dev_queue_xmit(skb); + if (rc) + rc = net_xmit_errno(rc); + + return rc; +} + +/* route alloc/release */ +static void mctp_route_release(struct mctp_route *rt) +{ + if (refcount_dec_and_test(&rt->refs)) { + dev_put(rt->dev->dev); + kfree_rcu(rt, rcu); + } +} + +/* returns a route with the refcount at 1 */ +static struct mctp_route *mctp_route_alloc(void) +{ + struct mctp_route *rt; + + rt = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!rt) + return NULL; + + INIT_LIST_HEAD(&rt->list); + refcount_set(&rt->refs, 1); + rt->output = mctp_route_discard; + + return rt; +} + +unsigned int mctp_default_net(struct net *net) +{ + return READ_ONCE(net->mctp.default_net); +} + +int mctp_default_net_set(struct net *net, unsigned int index) +{ + if (index == 0) + return -EINVAL; + WRITE_ONCE(net->mctp.default_net, index); + return 0; +} + +/* tag management */ +static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, + struct mctp_sock *msk) +{ + struct netns_mctp *mns = &net->mctp; + + lockdep_assert_held(&mns->keys_lock); + + /* we hold the net->key_lock here, allowing updates to both + * then net and sk + */ + hlist_add_head_rcu(&key->hlist, &mns->keys); + hlist_add_head_rcu(&key->sklist, &msk->keys); +} + +/* Allocate a locally-owned tag value for (saddr, daddr), and reserve + * it for the socket msk + */ +static int mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp) +{ + struct net *net = sock_net(&msk->sk); + struct netns_mctp *mns = &net->mctp; + struct mctp_sk_key *key, *tmp; + unsigned long flags; + int rc = -EAGAIN; + u8 tagbits; + + /* be optimistic, alloc now */ + key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); + if (!key) + return -ENOMEM; + + /* 8 possible tag values */ + tagbits = 0xff; + + spin_lock_irqsave(&mns->keys_lock, flags); + + /* Walk through the existing keys, looking for potential conflicting + * tags. If we find a conflict, clear that bit from tagbits + */ + hlist_for_each_entry(tmp, &mns->keys, hlist) { + /* if we don't own the tag, it can't conflict */ + if (tmp->tag & MCTP_HDR_FLAG_TO) + continue; + + if ((tmp->peer_addr == daddr || + tmp->peer_addr == MCTP_ADDR_ANY) && + tmp->local_addr == saddr) + tagbits &= ~(1 << tmp->tag); + + if (!tagbits) + break; + } + + if (tagbits) { + key->tag = __ffs(tagbits); + mctp_reserve_tag(net, key, msk); + *tagp = key->tag; + rc = 0; + } + + spin_unlock_irqrestore(&mns->keys_lock, flags); + + if (!tagbits) + kfree(key); + + return rc; +} + +/* routing lookups */ +static bool mctp_rt_match_eid(struct mctp_route *rt, + unsigned int net, mctp_eid_t eid) +{ + return READ_ONCE(rt->dev->net) == net && + rt->min <= eid && rt->max >= eid; +} + +/* compares match, used for duplicate prevention */ +static bool mctp_rt_compare_exact(struct mctp_route *rt1, + struct mctp_route *rt2) +{ + ASSERT_RTNL(); + return rt1->dev->net == rt2->dev->net && + rt1->min == rt2->min && + rt1->max == rt2->max; +} + +struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr) +{ + struct mctp_route *tmp, *rt = NULL; + + list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { + /* TODO: add metrics */ + if (mctp_rt_match_eid(tmp, dnet, daddr)) { + if (refcount_inc_not_zero(&tmp->refs)) { + rt = tmp; + break; + } + } + } + + return rt; +} + +/* sends a skb to rt and releases the route. */ +int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) +{ + int rc; + + rc = rt->output(rt, skb); + mctp_route_release(rt); + return rc; +} + +static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, + unsigned int mtu, u8 tag) +{ + const unsigned int hlen = sizeof(struct mctp_hdr); + struct mctp_hdr *hdr, *hdr2; + unsigned int pos, size; + struct sk_buff *skb2; + int rc; + u8 seq; + + hdr = mctp_hdr(skb); + seq = 0; + rc = 0; + + if (mtu < hlen + 1) { + kfree_skb(skb); + return -EMSGSIZE; + } + + /* we've got the header */ + skb_pull(skb, hlen); + + for (pos = 0; pos < skb->len;) { + /* size of message payload */ + size = min(mtu - hlen, skb->len - pos); + + skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL); + if (!skb2) { + rc = -ENOMEM; + break; + } + + /* generic skb copy */ + skb2->protocol = skb->protocol; + skb2->priority = skb->priority; + skb2->dev = skb->dev; + memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); + + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + + /* establish packet */ + skb_reserve(skb2, MCTP_HEADER_MAXLEN); + skb_reset_network_header(skb2); + skb_put(skb2, hlen + size); + skb2->transport_header = skb2->network_header + hlen; + + /* copy header fields, calculate SOM/EOM flags & seq */ + hdr2 = mctp_hdr(skb2); + hdr2->ver = hdr->ver; + hdr2->dest = hdr->dest; + hdr2->src = hdr->src; + hdr2->flags_seq_tag = tag & + (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + + if (pos == 0) + hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; + + if (pos + size == skb->len) + hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; + + hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; + + /* copy message payload */ + skb_copy_bits(skb, pos, skb_transport_header(skb2), size); + + /* do route, but don't drop the rt reference */ + rc = rt->output(rt, skb2); + if (rc) + break; + + seq = (seq + 1) & MCTP_HDR_SEQ_MASK; + pos += size; + } + + mctp_route_release(rt); + consume_skb(skb); + return rc; +} + +int mctp_local_output(struct sock *sk, struct mctp_route *rt, + struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) +{ + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + struct mctp_skb_cb *cb = mctp_cb(skb); + struct mctp_hdr *hdr; + unsigned long flags; + unsigned int mtu; + mctp_eid_t saddr; + int rc; + u8 tag; + + if (WARN_ON(!rt->dev)) + return -EINVAL; + + spin_lock_irqsave(&rt->dev->addrs_lock, flags); + if (rt->dev->num_addrs == 0) { + rc = -EHOSTUNREACH; + } else { + /* use the outbound interface's first address as our source */ + saddr = rt->dev->addrs[0]; + rc = 0; + } + spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); + + if (rc) + return rc; + + if (req_tag & MCTP_HDR_FLAG_TO) { + rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); + if (rc) + return rc; + tag |= MCTP_HDR_FLAG_TO; + } else { + tag = req_tag; + } + + + skb->protocol = htons(ETH_P_MCTP); + skb->priority = 0; + skb_reset_transport_header(skb); + skb_push(skb, sizeof(struct mctp_hdr)); + skb_reset_network_header(skb); + skb->dev = rt->dev->dev; + + /* cb->net will have been set on initial ingress */ + cb->src = saddr; + + /* set up common header fields */ + hdr = mctp_hdr(skb); + hdr->ver = 1; + hdr->dest = daddr; + hdr->src = saddr; + + mtu = mctp_route_mtu(rt); + + if (skb->len + sizeof(struct mctp_hdr) <= mtu) { + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | + tag; + return mctp_do_route(rt, skb); + } else { + return mctp_do_fragment_route(rt, skb, mtu, tag); + } +} + +/* route management */ +static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, + unsigned int daddr_extent, unsigned int mtu, + bool is_local) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_route *rt, *ert; + + if (!mctp_address_ok(daddr_start)) + return -EINVAL; + + if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) + return -EINVAL; + + rt = mctp_route_alloc(); + if (!rt) + return -ENOMEM; + + rt->min = daddr_start; + rt->max = daddr_start + daddr_extent; + rt->mtu = mtu; + rt->dev = mdev; + dev_hold(rt->dev->dev); + rt->output = is_local ? mctp_route_input : mctp_route_output; + + ASSERT_RTNL(); + /* Prevent duplicate identical routes. */ + list_for_each_entry(ert, &net->mctp.routes, list) { + if (mctp_rt_compare_exact(rt, ert)) { + mctp_route_release(rt); + return -EEXIST; + } + } + + list_add_rcu(&rt->list, &net->mctp.routes); + + return 0; +} + +static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, + unsigned int daddr_extent) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_route *rt, *tmp; + mctp_eid_t daddr_end; + bool dropped; + + if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) + return -EINVAL; + + daddr_end = daddr_start + daddr_extent; + dropped = false; + + ASSERT_RTNL(); + + list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { + if (rt->dev == mdev && + rt->min == daddr_start && rt->max == daddr_end) { + list_del_rcu(&rt->list); + /* TODO: immediate RTM_DELROUTE */ + mctp_route_release(rt); + dropped = true; + } + } + + return dropped ? 0 : -ENOENT; +} + +int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) +{ + return mctp_route_add(mdev, addr, 0, 0, true); +} + +int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) +{ + return mctp_route_remove(mdev, addr, 0); +} + +/* removes all entries for a given device */ +void mctp_route_remove_dev(struct mctp_dev *mdev) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_route *rt, *tmp; + + ASSERT_RTNL(); + list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { + if (rt->dev == mdev) { + list_del_rcu(&rt->list); + /* TODO: immediate RTM_DELROUTE */ + mctp_route_release(rt); + } + } +} + +/* Incoming packet-handling */ + +static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) +{ + struct net *net = dev_net(dev); + struct mctp_skb_cb *cb; + struct mctp_route *rt; + struct mctp_hdr *mh; + + /* basic non-data sanity checks */ + if (dev->type != ARPHRD_MCTP) + goto err_drop; + + if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) + goto err_drop; + + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + + /* We have enough for a header; decode and route */ + mh = mctp_hdr(skb); + if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) + goto err_drop; + + cb = __mctp_cb(skb); + rcu_read_lock(); + cb->net = READ_ONCE(__mctp_dev_get(dev)->net); + rcu_read_unlock(); + + rt = mctp_route_lookup(net, cb->net, mh->dest); + if (!rt) + goto err_drop; + + mctp_do_route(rt, skb); + + return NET_RX_SUCCESS; + +err_drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type mctp_packet_type = { + .type = cpu_to_be16(ETH_P_MCTP), + .func = mctp_pkttype_receive, +}; + +/* netlink interface */ + +static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { + [RTA_DST] = { .type = NLA_U8 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_OIF] = { .type = NLA_U32 }, +}; + +/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. + * tb must hold RTA_MAX+1 elements. + */ +static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr **tb, struct rtmsg **rtm, + struct mctp_dev **mdev, mctp_eid_t *daddr_start) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + unsigned int ifindex; + int rc; + + rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, + rta_mctp_policy, extack); + if (rc < 0) { + NL_SET_ERR_MSG(extack, "incorrect format"); + return rc; + } + + if (!tb[RTA_DST]) { + NL_SET_ERR_MSG(extack, "dst EID missing"); + return -EINVAL; + } + *daddr_start = nla_get_u8(tb[RTA_DST]); + + if (!tb[RTA_OIF]) { + NL_SET_ERR_MSG(extack, "ifindex missing"); + return -EINVAL; + } + ifindex = nla_get_u32(tb[RTA_OIF]); + + *rtm = nlmsg_data(nlh); + if ((*rtm)->rtm_family != AF_MCTP) { + NL_SET_ERR_MSG(extack, "route family must be AF_MCTP"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "bad ifindex"); + return -ENODEV; + } + *mdev = mctp_dev_get_rtnl(dev); + if (!*mdev) + return -ENODEV; + + if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, "no routes to loopback"); + return -EINVAL; + } + + return 0; +} + +static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RTA_MAX + 1]; + mctp_eid_t daddr_start; + struct mctp_dev *mdev; + struct rtmsg *rtm; + unsigned int mtu; + int rc; + + rc = mctp_route_nlparse(skb, nlh, extack, tb, + &rtm, &mdev, &daddr_start); + if (rc < 0) + return rc; + + if (rtm->rtm_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); + return -EINVAL; + } + + /* TODO: parse mtu from nlparse */ + mtu = 0; + + rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, false); + return rc; +} + +static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RTA_MAX + 1]; + mctp_eid_t daddr_start; + struct mctp_dev *mdev; + struct rtmsg *rtm; + int rc; + + rc = mctp_route_nlparse(skb, nlh, extack, tb, + &rtm, &mdev, &daddr_start); + if (rc < 0) + return rc; + + /* we only have unicast routes */ + if (rtm->rtm_type != RTN_UNICAST) + return -EINVAL; + + rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len); + return rc; +} + +static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, + u32 portid, u32 seq, int event, unsigned int flags) +{ + struct nlmsghdr *nlh; + struct rtmsg *hdr; + void *metrics; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); + if (!nlh) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->rtm_family = AF_MCTP; + + /* we use the _len fields as a number of EIDs, rather than + * a number of bits in the address + */ + hdr->rtm_dst_len = rt->max - rt->min; + hdr->rtm_src_len = 0; + hdr->rtm_tos = 0; + hdr->rtm_table = RT_TABLE_DEFAULT; + hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ + hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ + hdr->rtm_type = RTN_ANYCAST; /* TODO: type from route */ + + if (nla_put_u8(skb, RTA_DST, rt->min)) + goto cancel; + + metrics = nla_nest_start_noflag(skb, RTA_METRICS); + if (!metrics) + goto cancel; + + if (rt->mtu) { + if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) + goto cancel; + } + + nla_nest_end(skb, metrics); + + if (rt->dev) { + if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) + goto cancel; + } + + /* TODO: conditional neighbour physaddr? */ + + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct mctp_route *rt; + int s_idx, idx; + + /* TODO: allow filtering on route data, possibly under + * cb->strict_check + */ + + /* TODO: change to struct overlay */ + s_idx = cb->args[0]; + idx = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(rt, &net->mctp.routes, list) { + if (idx++ < s_idx) + continue; + if (mctp_fill_rtinfo(skb, rt, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWROUTE, NLM_F_MULTI) < 0) + break; + } + + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + +/* net namespace implementation */ +static int __net_init mctp_routes_net_init(struct net *net) +{ + struct netns_mctp *ns = &net->mctp; + + INIT_LIST_HEAD(&ns->routes); + INIT_HLIST_HEAD(&ns->binds); + mutex_init(&ns->bind_lock); + INIT_HLIST_HEAD(&ns->keys); + spin_lock_init(&ns->keys_lock); + WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); + return 0; +} + +static void __net_exit mctp_routes_net_exit(struct net *net) +{ + struct mctp_route *rt; + + list_for_each_entry_rcu(rt, &net->mctp.routes, list) + mctp_route_release(rt); +} + +static struct pernet_operations mctp_net_ops = { + .init = mctp_routes_net_init, + .exit = mctp_routes_net_exit, +}; + +int __init mctp_routes_init(void) +{ + dev_add_pack(&mctp_packet_type); + + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, + NULL, mctp_dump_rtinfo, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, + mctp_newroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, + mctp_delroute, NULL, 0); + + return register_pernet_subsys(&mctp_net_ops); +} + +void __exit mctp_routes_exit(void) +{ + unregister_pernet_subsys(&mctp_net_ops); + rtnl_unregister(PF_MCTP, RTM_DELROUTE); + rtnl_unregister(PF_MCTP, RTM_NEWROUTE); + rtnl_unregister(PF_MCTP, RTM_GETROUTE); + dev_remove_pack(&mctp_packet_type); +} diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 05a21dd072df..ffeb2df8be7a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -407,7 +407,6 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Verify ttl is valid */ if (dec.ttl <= 1) goto err; - dec.ttl -= 1; /* Find the output device */ out_dev = rcu_dereference(nh->nh_dev); @@ -431,6 +430,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, skb->dev = out_dev; skb->protocol = htons(ETH_P_MPLS_UC); + dec.ttl -= 1; if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ if (!mpls_egress(dev_net(out_dev), rt, skb, dec)) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 551976e4284c..8ecad71b3613 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -99,7 +99,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); break; case NFPROTO_IPV6: - flow_tuple->mtu = ip6_dst_mtu_forward(dst); + flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); break; } diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index f92006cec94c..2bfd9f1b8f11 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net, flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, &val, &mask); - if (dev) - dev_put(dev); + dev_put(dev); return 0; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index bbd1209694b8..7f2f69b609d8 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ - if (state->in) - dev_put(state->in); - if (state->out) - dev_put(state->out); + dev_put(state->in); + dev_put(state->out); if (state->sk) sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->physin) - dev_put(entry->physin); - if (entry->physout) - dev_put(entry->physout); + dev_put(entry->physin); + dev_put(entry->physout); #endif } @@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; - if (state->in) - dev_hold(state->in); - if (state->out) - dev_hold(state->out); + dev_hold(state->in); + dev_hold(state->out); if (state->sk) sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->physin) - dev_hold(entry->physin); - if (entry->physout) - dev_hold(entry->physout); + dev_hold(entry->physin); + dev_hold(entry->physout); #endif } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index baf235721c43..000bb3da4f77 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -187,14 +187,14 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, } doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, sizeof(u32), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->lvl.local == NULL) { ret_val = -ENOMEM; goto add_std_failure; } doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, sizeof(u32), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->lvl.cipso == NULL) { ret_val = -ENOMEM; goto add_std_failure; @@ -263,7 +263,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, doi_def->map.std->cat.local = kcalloc( doi_def->map.std->cat.local_size, sizeof(u32), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->cat.local == NULL) { ret_val = -ENOMEM; goto add_std_failure; @@ -271,7 +271,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, doi_def->map.std->cat.cipso = kcalloc( doi_def->map.std->cat.cipso_size, sizeof(u32), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->cat.cipso == NULL) { ret_val = -ENOMEM; goto add_std_failure; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 2483df0bbd7c..566ba4397ee4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, netlbl_af4list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr->s_addr, mask->s_addr); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { @@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, netlbl_af6list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr, mask); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 380f95aacdec..24b7cf447bc5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2545,13 +2545,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); + if (err == -ESRCH) + err = 0; } if (report) { int err2; err2 = nlmsg_unicast(sk, skb, portid); - if (!err || err == -ESRCH) + if (!err) err = err2; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2d6fdf40df66..1afca2a6c2ac 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -40,14 +40,6 @@ void genl_unlock(void) } EXPORT_SYMBOL(genl_unlock); -#ifdef CONFIG_LOCKDEP -bool lockdep_genl_is_held(void) -{ - return lockdep_is_held(&genl_mutex); -} -EXPORT_SYMBOL(lockdep_genl_is_held); -#endif - static void genl_lock_all(void) { down_write(&cb_lock); @@ -1485,6 +1477,7 @@ int genlmsg_multicast_allns(const struct genl_family *family, { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; + group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group, flags); } @@ -1495,14 +1488,12 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb, { struct net *net = genl_info_net(info); struct sock *sk = net->genl_sock; - int report = 0; - - if (info->nlhdr) - report = nlmsg_report(info->nlhdr); if (WARN_ON_ONCE(group >= family->n_mcgrps)) return; + group = family->mcgrp_offset + group; - nlmsg_notify(sk, skb, info->snd_portid, group, report, flags); + nlmsg_notify(sk, skb, info->snd_portid, group, + nlmsg_report(info->nlhdr), flags); } EXPORT_SYMBOL(genl_notify); diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index a880dd33e901..511819fbfa67 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused) if (dev == NULL || nr_rx_frame(skb, dev) == 0) kfree_skb(skb); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running()) mod_timer(&loopback_timer, jiffies + 10); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index de0456073dc0..ddd5cbd455e3 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -582,8 +582,7 @@ struct net_device *nr_dev_first(void) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } - if (first) - dev_hold(first); + dev_hold(first); rcu_read_unlock(); return first; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 4a9e72073564..6024fad905ff 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -79,7 +79,7 @@ int __init af_nfc_init(void) return sock_register(&nfc_sock_family_ops); } -void af_nfc_exit(void) +void __exit af_nfc_exit(void) { sock_unregister(PF_NFC); } diff --git a/net/nfc/core.c b/net/nfc/core.c index 573c80c6ff7a..3c645c1d99c9 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -636,7 +636,7 @@ error: return rc; } -int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); @@ -665,7 +665,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb) EXPORT_SYMBOL(nfc_tm_data_received); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, - u8 *gb, size_t gb_len) + const u8 *gb, size_t gb_len) { int rc; @@ -824,7 +824,7 @@ EXPORT_SYMBOL(nfc_targets_found); */ int nfc_target_lost(struct nfc_dev *dev, u32 target_idx) { - struct nfc_target *tg; + const struct nfc_target *tg; int i; pr_debug("dev_name %s n_target %d\n", dev_name(&dev->dev), target_idx); @@ -1048,7 +1048,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) * @tx_headroom: reserved space at beginning of skb * @tx_tailroom: reserved space at end of skb */ -struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, +struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, int tx_headroom, int tx_tailroom) { diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 5044c7db577e..fefc03674f4f 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -732,7 +732,7 @@ exit: return rc; } -static struct nfc_ops digital_nfc_ops = { +static const struct nfc_ops digital_nfc_ops = { .dev_up = digital_dev_up, .dev_down = digital_dev_down, .start_poll = digital_start_poll, @@ -745,7 +745,7 @@ static struct nfc_ops digital_nfc_ops = { .im_transceive = digital_in_send, }; -struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, +struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, int tx_tailroom) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 3481941be70b..ceb87db57cdb 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -128,7 +128,7 @@ static void nfc_hci_msg_rx_work(struct work_struct *work) struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev, msg_rx_work); struct sk_buff *skb; - struct hcp_message *message; + const struct hcp_message *message; u8 pipe; u8 type; u8 instruction; @@ -182,9 +182,9 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { u8 status = NFC_HCI_ANY_OK; - struct hci_create_pipe_resp *create_info; - struct hci_delete_pipe_noti *delete_info; - struct hci_all_pipe_cleared_noti *cleared_info; + const struct hci_create_pipe_resp *create_info; + const struct hci_delete_pipe_noti *delete_info; + const struct hci_all_pipe_cleared_noti *cleared_info; u8 gate; pr_debug("from pipe %x cmd %x\n", pipe, cmd); @@ -447,7 +447,7 @@ static void nfc_hci_cmd_timeout(struct timer_list *t) } static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count, - struct nfc_hci_gate *gates) + const struct nfc_hci_gate *gates) { int r; while (gate_count--) { @@ -928,7 +928,7 @@ static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) return hdev->ops->fw_download(hdev, firmware_name); } -static struct nfc_ops hci_nfc_ops = { +static const struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, .start_poll = hci_start_poll, @@ -947,7 +947,7 @@ static struct nfc_ops hci_nfc_ops = { .se_io = hci_se_io, }; -struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, +struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index 6ab40ea17662..2140f6724644 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -11,7 +11,7 @@ static LIST_HEAD(llc_engines); -int nfc_llc_init(void) +int __init nfc_llc_init(void) { int r; @@ -41,7 +41,7 @@ void nfc_llc_exit(void) } } -int nfc_llc_register(const char *name, struct nfc_llc_ops *ops) +int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops) { struct nfc_llc_engine *llc_engine; diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index 823ddb621e5d..d66271d211a5 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -26,20 +26,20 @@ struct nfc_llc_ops { struct nfc_llc_engine { const char *name; - struct nfc_llc_ops *ops; + const struct nfc_llc_ops *ops; struct list_head entry; }; struct nfc_llc { void *data; - struct nfc_llc_ops *ops; + const struct nfc_llc_ops *ops; int rx_headroom; int rx_tailroom; }; void *nfc_llc_get_data(struct nfc_llc *llc); -int nfc_llc_register(const char *name, struct nfc_llc_ops *ops); +int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); void nfc_llc_unregister(const char *name); int nfc_llc_nop_register(void); diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c index a42852f36f2e..a58716f16954 100644 --- a/net/nfc/hci/llc_nop.c +++ b/net/nfc/hci/llc_nop.c @@ -71,7 +71,7 @@ static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) return llc_nop->xmit_to_drv(llc_nop->hdev, skb); } -static struct nfc_llc_ops llc_nop_ops = { +static const struct nfc_llc_ops llc_nop_ops = { .init = llc_nop_init, .deinit = llc_nop_deinit, .start = llc_nop_start, diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 1e3a90049da9..aef750d7787c 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -123,7 +123,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z) return ((y >= x) || (y < z)) ? true : false; } -static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, +static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc, int payload_len) { struct sk_buff *skb; @@ -137,7 +137,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, } /* immediately sends an S frame. */ -static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc, enum sframe_type sframe_type, int nr) { int r; @@ -159,7 +159,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, } /* immediately sends an U frame. skb may contain optional payload */ -static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc, struct sk_buff *skb, enum uframe_modifier uframe_modifier) { @@ -361,7 +361,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) wake_up(shdlc->connect_wq); } -static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc) { struct sk_buff *skb; @@ -377,7 +377,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); } -static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc) { struct sk_buff *skb; @@ -820,7 +820,7 @@ static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) return 0; } -static struct nfc_llc_ops llc_shdlc_ops = { +static const struct nfc_llc_ops llc_shdlc_ops = { .init = llc_shdlc_init, .deinit = llc_shdlc_deinit, .start = llc_shdlc_start, diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index 97853c9cefc7..d49d4bf2e37c 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -221,15 +221,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock); /* TLV API */ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); /* Commands API */ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length); struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap); -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 475061c79c44..3c4172a5aeb5 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -15,7 +15,7 @@ #include "nfc.h" #include "llcp.h" -static u8 llcp_tlv_length[LLCP_TLV_MAX] = { +static const u8 llcp_tlv_length[LLCP_TLV_MAX] = { 0, 1, /* VERSION */ 2, /* MIUX */ @@ -29,7 +29,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = { }; -static u8 llcp_tlv8(u8 *tlv, u8 type) +static u8 llcp_tlv8(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -37,7 +37,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type) return tlv[2]; } -static u16 llcp_tlv16(u8 *tlv, u8 type) +static u16 llcp_tlv16(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -46,37 +46,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type) } -static u8 llcp_tlv_version(u8 *tlv) +static u8 llcp_tlv_version(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_VERSION); } -static u16 llcp_tlv_miux(u8 *tlv) +static u16 llcp_tlv_miux(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff; } -static u16 llcp_tlv_wks(u8 *tlv) +static u16 llcp_tlv_wks(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_WKS); } -static u16 llcp_tlv_lto(u8 *tlv) +static u16 llcp_tlv_lto(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_LTO); } -static u8 llcp_tlv_opt(u8 *tlv) +static u8 llcp_tlv_opt(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_OPT); } -static u8 llcp_tlv_rw(u8 *tlv) +static u8 llcp_tlv_rw(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf; } -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length) +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length) { u8 *tlv, length; @@ -130,7 +130,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap) return sdres; } -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len) { struct nfc_llcp_sdp_tlv *sdreq; @@ -190,9 +190,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head) } int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -239,9 +240,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, } int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -295,7 +297,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu, return pdu; } -static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, +static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv, u8 tlv_length) { /* XXX Add an skb length check */ @@ -389,9 +391,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *service_name_tlv = NULL, service_name_tlv_length; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *service_name_tlv = NULL; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; @@ -465,8 +468,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index cc997518f79d..eaeb2b1cfa6a 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -301,7 +301,7 @@ static char *wks[] = { "urn:nfc:sn:snep", }; -static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) +static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len) { int sap, num_wks; @@ -325,7 +325,7 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) static struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len) { struct sock *sk; struct nfc_llcp_sock *llcp_sock, *tmp_sock; @@ -522,7 +522,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { u8 *gb_cur, version, version_length; u8 lto_length, wks_length, miux_length; - u8 *version_tlv = NULL, *lto_tlv = NULL, + const u8 *version_tlv = NULL, *lto_tlv = NULL, *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; @@ -612,7 +612,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) return local->gb; } -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { struct nfc_llcp_local *local; @@ -639,27 +639,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) local->remote_gb_len - 3); } -static u8 nfc_llcp_dsap(struct sk_buff *pdu) +static u8 nfc_llcp_dsap(const struct sk_buff *pdu) { return (pdu->data[0] & 0xfc) >> 2; } -static u8 nfc_llcp_ptype(struct sk_buff *pdu) +static u8 nfc_llcp_ptype(const struct sk_buff *pdu) { return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6); } -static u8 nfc_llcp_ssap(struct sk_buff *pdu) +static u8 nfc_llcp_ssap(const struct sk_buff *pdu) { return pdu->data[1] & 0x3f; } -static u8 nfc_llcp_ns(struct sk_buff *pdu) +static u8 nfc_llcp_ns(const struct sk_buff *pdu) { return pdu->data[2] >> 4; } -static u8 nfc_llcp_nr(struct sk_buff *pdu) +static u8 nfc_llcp_nr(const struct sk_buff *pdu) { return pdu->data[2] & 0xf; } @@ -801,7 +801,7 @@ out: } static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len) { struct nfc_llcp_sock *llcp_sock; @@ -815,9 +815,10 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, return llcp_sock; } -static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len) +static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len) { - u8 *tlv = &skb->data[2], type, length; + u8 type, length; + const u8 *tlv = &skb->data[2]; size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0; while (offset < tlv_array_len) { @@ -875,7 +876,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, } static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct sock *new_sk, *parent; struct nfc_llcp_sock *sock, *new_sock; @@ -893,7 +894,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, goto fail; } } else { - u8 *sn; + const u8 *sn; size_t sn_len; sn = nfc_llcp_connect_sn(skb, &sn_len); @@ -1112,7 +1113,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, } static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1155,7 +1156,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1188,7 +1190,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1226,12 +1229,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) } static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; - u8 dsap, ssap, *tlv, type, length, tid, sap; + u8 dsap, ssap, type, length, tid, sap; + const u8 *tlv; u16 tlv_len, offset; - char *service_name; + const char *service_name; size_t service_name_len; struct nfc_llcp_sdp_tlv *sdp; HLIST_HEAD(llc_sdres_list); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index da7fe9db1b00..82ab39d80726 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -53,9 +53,9 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, } int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, - struct dest_spec_params *params) + const struct dest_spec_params *params) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; list_for_each_entry(conn_info, &ndev->conn_info_list, list) { if (conn_info->dest_type == dest_type) { @@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err) /* Execute request and wait for completion. */ static int __nci_request(struct nci_dev *ndev, - void (*req)(struct nci_dev *ndev, unsigned long opt), - unsigned long opt, __u32 timeout) + void (*req)(struct nci_dev *ndev, const void *opt), + const void *opt, __u32 timeout) { int rc = 0; long completion_rc; @@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev, inline int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout) + const void *opt), + const void *opt, __u32 timeout) { int rc; @@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev, return rc; } -static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) +static void nci_reset_req(struct nci_dev *ndev, const void *opt) { struct nci_core_reset_cmd cmd; @@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd); } -static void nci_init_req(struct nci_dev *ndev, unsigned long opt) +static void nci_init_req(struct nci_dev *ndev, const void *opt) { u8 plen = 0; if (opt) plen = sizeof(struct nci_core_init_v2_cmd); - nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt); + nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt); } -static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) +static void nci_init_complete_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_disc_map_cmd cmd; struct disc_map_config *cfg = cmd.mapping_configs; @@ -210,14 +210,14 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) } struct nci_set_config_param { - __u8 id; - size_t len; - __u8 *val; + __u8 id; + size_t len; + const __u8 *val; }; -static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt) +static void nci_set_config_req(struct nci_dev *ndev, const void *opt) { - struct nci_set_config_param *param = (struct nci_set_config_param *)opt; + const struct nci_set_config_param *param = opt; struct nci_core_set_config_cmd cmd; BUG_ON(param->len > NCI_MAX_PARAM_LEN); @@ -235,10 +235,9 @@ struct nci_rf_discover_param { __u32 tm_protocols; }; -static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt) { - struct nci_rf_discover_param *param = - (struct nci_rf_discover_param *)opt; + const struct nci_rf_discover_param *param = opt; struct nci_rf_disc_cmd cmd; cmd.num_disc_configs = 0; @@ -301,10 +300,9 @@ struct nci_rf_discover_select_param { __u8 rf_protocol; }; -static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt) { - struct nci_rf_discover_select_param *param = - (struct nci_rf_discover_select_param *)opt; + const struct nci_rf_discover_select_param *param = opt; struct nci_rf_discover_select_cmd cmd; cmd.rf_discovery_id = param->rf_discovery_id; @@ -328,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt) sizeof(struct nci_rf_discover_select_cmd), &cmd); } -static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_deactivate_cmd cmd; - cmd.type = opt; + cmd.type = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD, sizeof(struct nci_rf_deactivate_cmd), &cmd); @@ -341,18 +339,17 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) struct nci_cmd_param { __u16 opcode; size_t len; - __u8 *payload; + const __u8 *payload; }; -static void nci_generic_req(struct nci_dev *ndev, unsigned long opt) +static void nci_generic_req(struct nci_dev *ndev, const void *opt) { - struct nci_cmd_param *param = - (struct nci_cmd_param *)opt; + const struct nci_cmd_param *param = opt; nci_send_cmd(ndev, param->opcode, param->len, param->payload); } -int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) +int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload) { struct nci_cmd_param param; @@ -360,12 +357,13 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) param.len = len; param.payload = payload; - return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, + return __nci_request(ndev, nci_generic_req, ¶m, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_prop_cmd); -int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload) +int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, + const __u8 *payload) { struct nci_cmd_param param; @@ -373,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload) param.len = len; param.payload = payload; - return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, + return __nci_request(ndev, nci_generic_req, ¶m, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_cmd); int nci_core_reset(struct nci_dev *ndev) { - return __nci_request(ndev, nci_reset_req, 0, + return __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } EXPORT_SYMBOL(nci_core_reset); int nci_core_init(struct nci_dev *ndev) { - return __nci_request(ndev, nci_init_req, 0, + return __nci_request(ndev, nci_init_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } EXPORT_SYMBOL(nci_core_init); @@ -397,9 +395,9 @@ struct nci_loopback_data { struct sk_buff *data; }; -static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt) +static void nci_send_data_req(struct nci_dev *ndev, const void *opt) { - struct nci_loopback_data *data = (struct nci_loopback_data *)opt; + const struct nci_loopback_data *data = opt; nci_send_data(ndev, data->conn_id, data->data); } @@ -407,7 +405,7 @@ static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt) static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err) { struct nci_dev *ndev = (struct nci_dev *)context; - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); if (!conn_info) { @@ -420,7 +418,7 @@ static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err) nci_req_complete(ndev, NCI_STATUS_OK); } -int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, +int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, struct sk_buff **resp) { int r; @@ -460,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, loopback_data.data = skb; ndev->cur_conn_id = conn_id; - r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data, + r = nci_request(ndev, nci_send_data_req, &loopback_data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK && resp) *resp = conn_info->rx_skb; @@ -493,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = ndev->ops->init(ndev); if (!rc) { - rc = __nci_request(ndev, nci_reset_req, 0, + rc = __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } @@ -506,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev) .feature1 = NCI_FEATURE_DISABLE, .feature2 = NCI_FEATURE_DISABLE }; - unsigned long opt = 0; + const void *opt = NULL; if (ndev->nci_ver & NCI_VER_2_MASK) - opt = (unsigned long)&nci_init_v2_cmd; + opt = &nci_init_v2_cmd; rc = __nci_request(ndev, nci_init_req, opt, msecs_to_jiffies(NCI_INIT_TIMEOUT)); @@ -519,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = ndev->ops->post_setup(ndev); if (!rc) { - rc = __nci_request(ndev, nci_init_complete_req, 0, + rc = __nci_request(ndev, nci_init_complete_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } @@ -569,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev) atomic_set(&ndev->cmd_cnt, 1); set_bit(NCI_INIT, &ndev->flags); - __nci_request(ndev, nci_reset_req, 0, + __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty @@ -624,7 +622,7 @@ static int nci_dev_down(struct nfc_dev *nfc_dev) return nci_close_device(ndev); } -int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val) +int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val) { struct nci_set_config_param param; @@ -635,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val) param.len = len; param.val = val; - return __nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + return __nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } EXPORT_SYMBOL(nci_set_config); -static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) +static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt) { struct nci_nfcee_discover_cmd cmd; - __u8 action = opt; + __u8 action = (unsigned long)opt; cmd.discovery_action = action; @@ -652,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) int nci_nfcee_discover(struct nci_dev *ndev, u8 action) { - return __nci_request(ndev, nci_nfcee_discover_req, action, + unsigned long opt = action; + + return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_discover); -static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt) +static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt) { - struct nci_nfcee_mode_set_cmd *cmd = - (struct nci_nfcee_mode_set_cmd *)opt; + const struct nci_nfcee_mode_set_cmd *cmd = opt; nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, sizeof(struct nci_nfcee_mode_set_cmd), cmd); @@ -673,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) cmd.nfcee_id = nfcee_id; cmd.nfcee_mode = nfcee_mode; - return __nci_request(ndev, nci_nfcee_mode_set_req, - (unsigned long)&cmd, + return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_mode_set); -static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) +static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt) { - struct core_conn_create_data *data = - (struct core_conn_create_data *)opt; + const struct core_conn_create_data *data = opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); } @@ -690,7 +687,7 @@ static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, u8 number_destination_params, size_t params_len, - struct core_conn_create_dest_spec_params *params) + const struct core_conn_create_dest_spec_params *params) { int r; struct nci_core_conn_create_cmd *cmd; @@ -719,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, } ndev->cur_dest_type = destination_type; - r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data, + r = __nci_request(ndev, nci_core_conn_create_req, &data, msecs_to_jiffies(NCI_CMD_TIMEOUT)); kfree(cmd); return r; } EXPORT_SYMBOL(nci_core_conn_create); -static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt) +static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt) { - __u8 conn_id = opt; + __u8 conn_id = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); } int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) { + unsigned long opt = conn_id; + ndev->cur_conn_id = conn_id; - return __nci_request(ndev, nci_core_conn_close_req, conn_id, + return __nci_request(ndev, nci_core_conn_close_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_conn_close); @@ -756,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) param.id = NCI_PN_ATR_REQ_GEN_BYTES; - rc = nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + rc = nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); if (rc) return rc; param.id = NCI_LN_ATR_RES_GEN_BYTES; - return nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + return nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } @@ -813,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, pr_debug("target active or w4 select, implicitly deactivate\n"); rc = nci_request(ndev, nci_rf_deactivate_req, - NCI_DEACTIVATE_TYPE_IDLE_MODE, + (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); if (rc) return -EBUSY; @@ -835,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, param.im_protocols = im_protocols; param.tm_protocols = tm_protocols; - rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)¶m, + rc = nci_request(ndev, nci_rf_discover_req, ¶m, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); if (!rc) @@ -854,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev) return; } - nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE, + nci_request(ndev, nci_rf_deactivate_req, + (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } @@ -863,7 +863,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_rf_discover_select_param param; - struct nfc_target *nci_target = NULL; + const struct nfc_target *nci_target = NULL; int i; int rc = 0; @@ -913,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, else param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; - rc = nci_request(ndev, nci_rf_discover_select_req, - (unsigned long)¶m, + rc = nci_request(ndev, nci_rf_discover_select_req, ¶m, msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT)); } @@ -929,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, __u8 mode) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); - u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; + unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; pr_debug("entry\n"); @@ -947,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, } if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { - nci_request(ndev, nci_rf_deactivate_req, nci_mode, + nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } } @@ -985,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev) } else { if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE || atomic_read(&ndev->state) == NCI_DISCOVERY) { - nci_request(ndev, nci_rf_deactivate_req, 0, - msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); + nci_request(ndev, nci_rf_deactivate_req, (void *)0, + msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } rc = nfc_tm_deactivated(nfc_dev); @@ -1004,7 +1003,7 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; conn_info = ndev->rf_conn_info; if (!conn_info) @@ -1102,7 +1101,7 @@ static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) return ndev->ops->fw_download(ndev, firmware_name); } -static struct nfc_ops nci_nfc_ops = { +static const struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, .start_poll = nci_start_poll, @@ -1129,7 +1128,7 @@ static struct nfc_ops nci_nfc_ops = { * @tx_headroom: Reserved space at beginning of skb * @tx_tailroom: Reserved space at end of skb */ -struct nci_dev *nci_allocate_device(struct nci_ops *ops, +struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom) { @@ -1152,8 +1151,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) { pr_err("Too many proprietary commands: %zd\n", ops->n_prop_ops); - ops->prop_ops = NULL; - ops->n_prop_ops = 0; + goto free_nci; } ndev->tx_headroom = tx_headroom; @@ -1270,7 +1268,7 @@ EXPORT_SYMBOL(nci_register_device); */ void nci_unregister_device(struct nci_dev *ndev) { - struct nci_conn_info *conn_info, *n; + struct nci_conn_info *conn_info, *n; nci_close_device(ndev); @@ -1332,7 +1330,7 @@ int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) EXPORT_SYMBOL(nci_send_frame); /* Send NCI command */ -int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) +int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload) { struct nci_ctrl_hdr *hdr; struct sk_buff *skb; @@ -1364,12 +1362,12 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) EXPORT_SYMBOL(nci_send_cmd); /* Proprietary commands API */ -static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops, - size_t n_ops, - __u16 opcode) +static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops, + size_t n_ops, + __u16 opcode) { size_t i; - struct nci_driver_ops *op; + const struct nci_driver_ops *op; if (!ops || !n_ops) return NULL; @@ -1384,10 +1382,10 @@ static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops, } static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, - struct sk_buff *skb, struct nci_driver_ops *ops, + struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { - struct nci_driver_ops *op; + const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, rsp_opcode); if (!op || !op->rsp) @@ -1397,10 +1395,10 @@ static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, } static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, - struct sk_buff *skb, struct nci_driver_ops *ops, + struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { - struct nci_driver_ops *op; + const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, ntf_opcode); if (!op || !op->ntf) @@ -1442,7 +1440,7 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, static void nci_tx_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work); - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; struct sk_buff *skb; conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index ce3382be937f..6055dc9a82aa 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -26,7 +26,7 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, __u8 conn_id, int err) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; data_exchange_cb_t cb; void *cb_context; @@ -80,7 +80,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); if (!conn_info) @@ -93,9 +93,9 @@ EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size); static int nci_queue_tx_data_frags(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; int total_len = skb->len; - unsigned char *data = skb->data; + const unsigned char *data = skb->data; unsigned long flags; struct sk_buff_head frags_q; struct sk_buff *skb_frag; @@ -166,7 +166,7 @@ exit: /* Send NCI data */ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; int rc = 0; pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len); @@ -269,7 +269,7 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) __u8 pbf = nci_pbf(skb->data); __u8 status = 0; __u8 conn_id = nci_conn_id(skb->data); - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; pr_debug("len %d\n", skb->len); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index d6732e5e8958..e199912ee1e5 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -16,11 +16,11 @@ #include <linux/nfc.h> struct nci_data { - u8 conn_id; - u8 pipe; - u8 cmd; - const u8 *data; - u32 data_len; + u8 conn_id; + u8 pipe; + u8 cmd; + const u8 *data; + u32 data_len; } __packed; struct nci_hci_create_pipe_params { @@ -142,7 +142,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, const u8 data_type, const u8 *data, size_t data_len) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; struct sk_buff *skb; int len, i, r; u8 cb = pipe; @@ -195,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return i; } -static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt) +static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt) { - struct nci_data *data = (struct nci_data *)opt; + const struct nci_data *data = opt; nci_hci_send_data(ndev, data->pipe, data->cmd, data->data, data->data_len); @@ -221,8 +221,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { - struct nci_hcp_message *message; - struct nci_conn_info *conn_info; + const struct nci_hcp_message *message; + const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -240,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, data.data = param; data.data_len = param_len; - r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; @@ -363,7 +363,7 @@ exit: static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct sk_buff *skb) { - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) @@ -406,7 +406,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work) struct nci_hci_dev *hdev = container_of(work, struct nci_hci_dev, msg_rx_work); struct sk_buff *skb; - struct nci_hcp_message *message; + const struct nci_hcp_message *message; u8 pipe, type, instruction; while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { @@ -498,7 +498,7 @@ void nci_hci_data_received_cb(void *context, int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) { struct nci_data data; - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) @@ -511,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) data.data = NULL; data.data_len = 0; - return nci_request(ndev, nci_hci_send_data_req, - (unsigned long)&data, - msecs_to_jiffies(NCI_DATA_TIMEOUT)); + return nci_request(ndev, nci_hci_send_data_req, &data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); } EXPORT_SYMBOL(nci_hci_open_pipe); @@ -523,7 +522,7 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, u8 pipe; struct sk_buff *skb; struct nci_hci_create_pipe_params params; - struct nci_hci_create_pipe_resp *resp; + const struct nci_hci_create_pipe_resp *resp; pr_debug("gate=%d\n", dest_gate); @@ -557,8 +556,8 @@ static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { - struct nci_hcp_message *message; - struct nci_conn_info *conn_info; + const struct nci_hcp_message *message; + const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 *tmp; @@ -587,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, data.data = tmp; data.data_len = param_len + 1; - r = nci_request(ndev, nci_hci_send_data_req, - (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; @@ -605,8 +603,8 @@ EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { - struct nci_hcp_message *message; - struct nci_conn_info *conn_info; + const struct nci_hcp_message *message; + const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -627,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, data.data = &idx; data.data_len = 1; - r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { @@ -697,7 +695,7 @@ EXPORT_SYMBOL(nci_hci_connect_gate); static int nci_hci_dev_connect_gates(struct nci_dev *ndev, u8 gate_count, - struct nci_hci_gate *gates) + const struct nci_hci_gate *gates) { int r; @@ -714,7 +712,7 @@ static int nci_hci_dev_connect_gates(struct nci_dev *ndev, int nci_hci_dev_session_init(struct nci_dev *ndev) { - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; struct sk_buff *skb; int r; diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 98af04c86b2c..c5eacaac41ae 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -28,10 +28,10 @@ /* Handle NCI Notification packets */ static void nci_core_reset_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { /* Handle NCI 2.x core reset notification */ - struct nci_core_reset_ntf *ntf = (void *)skb->data; + const struct nci_core_reset_ntf *ntf = (void *)skb->data; ndev->nci_ver = ntf->nci_ver; pr_debug("nci_ver 0x%x, config_status 0x%x\n", @@ -48,7 +48,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { struct nci_core_conn_credit_ntf *ntf = (void *) skb->data; - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; int i; pr_debug("num_entries %d\n", ntf->num_entries); @@ -80,7 +80,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, } static void nci_core_generic_error_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -107,9 +107,10 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev, nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO); } -static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfca_poll *nfca_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfca_poll *nfca_poll, + const __u8 *data) { nfca_poll->sens_res = __le16_to_cpu(*((__le16 *)data)); data += 2; @@ -134,9 +135,10 @@ static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcb_poll *nfcb_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcb_poll *nfcb_poll, + const __u8 *data) { nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE); @@ -148,9 +150,10 @@ static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcf_poll *nfcf_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcf_poll *nfcf_poll, + const __u8 *data) { nfcf_poll->bit_rate = *data++; nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE); @@ -164,9 +167,10 @@ static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcv_poll *nfcv_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcv_poll *nfcv_poll, + const __u8 *data) { ++data; nfcv_poll->dsfid = *data++; @@ -175,9 +179,10 @@ static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcf_listen *nfcf_listen, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcf_listen *nfcf_listen, + const __u8 *data) { nfcf_listen->local_nfcid2_len = min_t(__u8, *data++, NFC_NFCID2_MAXSIZE); @@ -198,12 +203,12 @@ static int nci_add_new_protocol(struct nci_dev *ndev, struct nfc_target *target, __u8 rf_protocol, __u8 rf_tech_and_mode, - void *params) + const void *params) { - struct rf_tech_specific_params_nfca_poll *nfca_poll; - struct rf_tech_specific_params_nfcb_poll *nfcb_poll; - struct rf_tech_specific_params_nfcf_poll *nfcf_poll; - struct rf_tech_specific_params_nfcv_poll *nfcv_poll; + const struct rf_tech_specific_params_nfca_poll *nfca_poll; + const struct rf_tech_specific_params_nfcb_poll *nfcb_poll; + const struct rf_tech_specific_params_nfcf_poll *nfcf_poll; + const struct rf_tech_specific_params_nfcv_poll *nfcv_poll; __u32 protocol; if (rf_protocol == NCI_RF_PROTOCOL_T1T) @@ -274,7 +279,7 @@ static int nci_add_new_protocol(struct nci_dev *ndev, } static void nci_add_new_target(struct nci_dev *ndev, - struct nci_rf_discover_ntf *ntf) + const struct nci_rf_discover_ntf *ntf) { struct nfc_target *target; int i, rc; @@ -319,10 +324,10 @@ void nci_clear_target_list(struct nci_dev *ndev) } static void nci_rf_discover_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nci_rf_discover_ntf ntf; - __u8 *data = skb->data; + const __u8 *data = skb->data; bool add_target = true; ntf.rf_discovery_id = *data++; @@ -382,7 +387,8 @@ static void nci_rf_discover_ntf_packet(struct nci_dev *ndev, } static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf, __u8 *data) + struct nci_rf_intf_activated_ntf *ntf, + const __u8 *data) { struct activation_params_nfca_poll_iso_dep *nfca_poll; struct activation_params_nfcb_poll_iso_dep *nfcb_poll; @@ -418,7 +424,8 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, } static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf, __u8 *data) + struct nci_rf_intf_activated_ntf *ntf, + const __u8 *data) { struct activation_params_poll_nfc_dep *poll; struct activation_params_listen_nfc_dep *listen; @@ -454,7 +461,7 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, } static void nci_target_auto_activated(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf) + const struct nci_rf_intf_activated_ntf *ntf) { struct nfc_target *target; int rc; @@ -477,7 +484,7 @@ static void nci_target_auto_activated(struct nci_dev *ndev, } static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf) + const struct nci_rf_intf_activated_ntf *ntf) { ndev->remote_gb_len = 0; @@ -519,11 +526,11 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, } static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; struct nci_rf_intf_activated_ntf ntf; - __u8 *data = skb->data; + const __u8 *data = skb->data; int err = NCI_STATUS_OK; ntf.rf_discovery_id = *data++; @@ -681,10 +688,10 @@ listen: } static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { - struct nci_conn_info *conn_info; - struct nci_rf_deactivate_ntf *ntf = (void *) skb->data; + const struct nci_conn_info *conn_info; + const struct nci_rf_deactivate_ntf *ntf = (void *)skb->data; pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason); @@ -725,10 +732,10 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, } static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { u8 status = NCI_STATUS_OK; - struct nci_nfcee_discover_ntf *nfcee_ntf = + const struct nci_nfcee_discover_ntf *nfcee_ntf = (struct nci_nfcee_discover_ntf *)skb->data; pr_debug("\n"); @@ -745,7 +752,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, } static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { pr_debug("\n"); } diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index e9605922a322..a2e72c003805 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -25,9 +25,10 @@ /* Handle NCI Response packets */ -static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +static void nci_core_reset_rsp_packet(struct nci_dev *ndev, + const struct sk_buff *skb) { - struct nci_core_reset_rsp *rsp = (void *) skb->data; + const struct nci_core_reset_rsp *rsp = (void *)skb->data; pr_debug("status 0x%x\n", rsp->status); @@ -43,10 +44,11 @@ static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) } } -static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb) +static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, + const struct sk_buff *skb) { - struct nci_core_init_rsp_1 *rsp_1 = (void *) skb->data; - struct nci_core_init_rsp_2 *rsp_2; + const struct nci_core_init_rsp_1 *rsp_1 = (void *)skb->data; + const struct nci_core_init_rsp_2 *rsp_2; pr_debug("status 0x%x\n", rsp_1->status); @@ -81,10 +83,11 @@ static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb) return NCI_STATUS_OK; } -static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb) +static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, + const struct sk_buff *skb) { - struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data; - u8 *supported_rf_interface = rsp->supported_rf_interfaces; + const struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data; + const u8 *supported_rf_interface = rsp->supported_rf_interfaces; u8 rf_interface_idx = 0; u8 rf_extension_cnt = 0; @@ -118,7 +121,7 @@ static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb) return NCI_STATUS_OK; } -static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +static void nci_core_init_rsp_packet(struct nci_dev *ndev, const struct sk_buff *skb) { u8 status = 0; @@ -160,9 +163,9 @@ exit: } static void nci_core_set_config_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { - struct nci_core_set_config_rsp *rsp = (void *) skb->data; + const struct nci_core_set_config_rsp *rsp = (void *)skb->data; pr_debug("status 0x%x\n", rsp->status); @@ -170,7 +173,7 @@ static void nci_core_set_config_rsp_packet(struct nci_dev *ndev, } static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -179,9 +182,10 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, nci_req_complete(ndev, status); } -static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, + const struct sk_buff *skb) { - struct nci_conn_info *conn_info; + struct nci_conn_info *conn_info; __u8 status = skb->data[0]; pr_debug("status 0x%x\n", status); @@ -210,7 +214,7 @@ exit: } static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -222,7 +226,7 @@ static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev, } static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -238,9 +242,9 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev, } static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { - struct nci_nfcee_discover_rsp *discover_rsp; + const struct nci_nfcee_discover_rsp *discover_rsp; if (skb->len != 2) { nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR); @@ -255,7 +259,7 @@ static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev, } static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -264,11 +268,11 @@ static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev, } static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; struct nci_conn_info *conn_info = NULL; - struct nci_core_conn_create_rsp *rsp; + const struct nci_core_conn_create_rsp *rsp; pr_debug("status 0x%x\n", status); @@ -319,7 +323,7 @@ exit: } static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nci_conn_info *conn_info; __u8 status = skb->data[0]; diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 7d8e10e27c20..0935527d1d12 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -27,7 +27,7 @@ #define CRC_INIT 0xFFFF -static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb, +static int __nci_spi_send(struct nci_spi *nspi, const struct sk_buff *skb, int cs_change) { struct spi_message m; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 722f7ef891e1..49089c50872e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -530,7 +530,7 @@ free_msg: int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx) { - struct nfc_se *se; + const struct nfc_se *se; struct sk_buff *msg; void *hdr; @@ -1531,7 +1531,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; - struct nfc_vendor_cmd *cmd; + const struct nfc_vendor_cmd *cmd; u32 dev_idx, vid, subcmd; u8 *data; size_t data_len; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 889fefd64e56..de2ec66d7e83 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -48,7 +48,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_llcp_register_device(struct nfc_dev *dev); void nfc_llcp_unregister_device(struct nfc_dev *dev); -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ef15d9eb4774..076774034bb9 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -924,7 +924,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.portid = nla_get_u32(a); + if (dp->user_features & + OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + upcall.portid = + ovs_dp_get_upcall_portid(dp, + smp_processor_id()); + else + upcall.portid = nla_get_u32(a); break; case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index bc164b35e67d..67ad08320886 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -133,6 +133,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, static void ovs_dp_masks_rebalance(struct work_struct *work); +static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *); + /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { @@ -166,6 +168,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) free_percpu(dp->stats_percpu); kfree(dp->ports); ovs_meters_exit(dp); + kfree(rcu_dereference_raw(dp->upcall_portids)); kfree(dp); } @@ -239,7 +242,13 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; - upcall.portid = ovs_vport_find_upcall_portid(p, skb); + + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + upcall.portid = + ovs_dp_get_upcall_portid(dp, smp_processor_id()); + else + upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall, 0); if (unlikely(error)) @@ -1594,16 +1603,70 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); +static int ovs_dp_set_upcall_portids(struct datapath *dp, + const struct nlattr *ids) +{ + struct dp_nlsk_pids *old, *dp_nlsk_pids; + + if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) + return -EINVAL; + + old = ovsl_dereference(dp->upcall_portids); + + dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids), + GFP_KERNEL); + if (!dp_nlsk_pids) + return -ENOMEM; + + dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32); + nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids)); + + rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids); + + kfree_rcu(old, rcu); + + return 0; +} + +u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) +{ + struct dp_nlsk_pids *dp_nlsk_pids; + + dp_nlsk_pids = rcu_dereference(dp->upcall_portids); + + if (dp_nlsk_pids) { + if (cpu_id < dp_nlsk_pids->n_pids) { + return dp_nlsk_pids->pids[cpu_id]; + } else if (dp_nlsk_pids->n_pids > 0 && + cpu_id >= dp_nlsk_pids->n_pids) { + /* If the number of netlink PIDs is mismatched with + * the number of CPUs as seen by the kernel, log this + * and send the upcall to an arbitrary socket (0) in + * order to not drop packets + */ + pr_info_ratelimited("cpu_id mismatch with handler threads"); + return dp_nlsk_pids->pids[cpu_id % + dp_nlsk_pids->n_pids]; + } else { + return 0; + } + } else { + return 0; + } +} + static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { u32 user_features = 0; + int err; if (a[OVS_DP_ATTR_USER_FEATURES]) { user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); if (user_features & ~(OVS_DP_F_VPORT_PIDS | OVS_DP_F_UNALIGNED | - OVS_DP_F_TC_RECIRC_SHARING)) + OVS_DP_F_TC_RECIRC_SHARING | + OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) return -EOPNOTSUPP; #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) @@ -1624,6 +1687,15 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) dp->user_features = user_features; + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && + a[OVS_DP_ATTR_PER_CPU_PIDS]) { + /* Upcall Netlink Port IDs have been updated */ + err = ovs_dp_set_upcall_portids(dp, + a[OVS_DP_ATTR_PER_CPU_PIDS]); + if (err) + return err; + } + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) static_branch_enable(&tc_recirc_sharing_support); else diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 38f7d3e66ca6..fcfe6cb46441 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -51,6 +51,21 @@ struct dp_stats_percpu { }; /** + * struct dp_nlsk_pids - array of netlink portids of for a datapath. + * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU + * is enabled and must be protected by rcu. + * @rcu: RCU callback head for deferred destruction. + * @n_pids: Size of @pids array. + * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets + * that miss the flow table. + */ +struct dp_nlsk_pids { + struct rcu_head rcu; + u32 n_pids; + u32 pids[]; +}; + +/** * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. @@ -61,6 +76,7 @@ struct dp_stats_percpu { * @net: Reference to net namespace. * @max_headroom: the maximum headroom of all vports in this datapath; it will * be used by all the internal vports in this dp. + * @upcall_portids: RCU protected 'struct dp_nlsk_pids'. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -87,6 +103,8 @@ struct datapath { /* Switch meters. */ struct dp_meter_table meter_tbl; + + struct dp_nlsk_pids __rcu *upcall_portids; }; /** @@ -243,6 +261,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *, const struct sw_flow_key *, const struct dp_upcall_info *, uint32_t cutlen); +u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id); + const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, u32 portid, u32 seq, u8 cmd); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 57a1971f29e5..543365f58e97 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po) rcu_read_lock(); dev = rcu_dereference(po->cached_dev); - if (likely(dev)) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; @@ -3024,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) out_free: kfree_skb(skb); out_unlock: - if (dev) - dev_put(dev); + dev_put(dev); out: return err; } @@ -3158,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, } } - if (dev) - dev_hold(dev); + dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; @@ -3196,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, packet_cached_dev_assign(po, dev); } } - if (dev_curr) - dev_put(dev_curr); + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; @@ -4109,8 +4105,7 @@ static int packet_notifier(struct notifier_block *this, if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); + dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index ca6ae4c59433..65218b7ce9f9 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, drop: kfree_skb(skb); - if (dev) - dev_put(dev); + dev_put(dev); return err; } EXPORT_SYMBOL(pn_skb_send); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index ac0fae06cc15..cde671d29d5d 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net) break; dev = NULL; } - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -233,11 +232,11 @@ static int phonet_device_autoconf(struct net_device *dev) struct if_phonet_req req; int ret; - if (!dev->netdev_ops->ndo_do_ioctl) + if (!dev->netdev_ops->ndo_siocdevprivate) return -EOPNOTSUPP; - ret = dev->netdev_ops->ndo_do_ioctl(dev, (struct ifreq *)&req, - SIOCPNGAUTOCONF); + ret = dev->netdev_ops->ndo_siocdevprivate(dev, (struct ifreq *)&req, + NULL, SIOCPNGAUTOCONF); if (ret < 0) return ret; @@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr) daddr >>= 2; rcu_read_lock(); dev = rcu_dereference(routes->table[daddr]); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); if (!dev) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 2599235d592e..71e2caf6ab85 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, saddr = PN_NO_ADDR; release_sock(sk); - if (dev) - dev_put(dev); + dev_put(dev); if (saddr == PN_NO_ADDR) return -EHOSTUNREACH; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 171b7f3be6ef..6c61b7b1838f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1157,14 +1157,14 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = put_user(len, (int __user *)argp); break; case SIOCGIFADDR: - if (copy_from_user(&ifr, argp, sizeof(ifr))) { + if (get_user_ifreq(&ifr, NULL, argp)) { rc = -EFAULT; break; } sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; *sq = ipc->us; - if (copy_to_user(argp, &ifr, sizeof(ifr))) { + if (put_user_ifreq(&ifr, argp)) { rc = -EFAULT; break; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d17a66aab8ee..7dd3a2dc5fa4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -495,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; - p->tcfa_flags = flags; + p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, @@ -941,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[]) } } -struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack) { @@ -951,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, struct nlattr *kind; int err; - if (name == NULL) { + if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) @@ -967,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, return ERR_PTR(err); } } else { - if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { + if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(-EINVAL); } @@ -1004,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct tc_action_ops *a_o, int *init_res, - bool rtnl_held, - struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { - struct nla_bitfield32 flags = { 0, 0 }; + bool police = flags & TCA_ACT_FLAGS_POLICE; + struct nla_bitfield32 userflags = { 0, 0 }; u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_cookie *cookie = NULL; @@ -1017,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; /* backward compatibility for policer */ - if (name == NULL) { + if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) @@ -1032,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); if (tb[TCA_ACT_FLAGS]) - flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); - err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, - rtnl_held, tp, flags.value, extack); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, + userflags.value | flags, extack); } else { - err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, - tp, flags.value, extack); + err = a_o->init(net, nla, est, &a, tp, userflags.value | flags, + extack); } if (err < 0) goto err_out; *init_res = err; - if (!name && tb[TCA_ACT_COOKIE]) + if (!police && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); - if (!name) + if (!police) a->hw_stats = hw_stats; return a; @@ -1063,9 +1062,9 @@ err_out: /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, int bind, - struct tc_action *actions[], int init_res[], size_t *attr_size, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr *est, struct tc_action *actions[], + int init_res[], size_t *attr_size, u32 flags, + struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1082,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack); + a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE, + !(flags & TCA_ACT_FLAGS_NO_RTNL), + extack); if (IS_ERR(a_o)) { err = PTR_ERR(a_o); goto err_mod; @@ -1091,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, - ops[i - 1], &init_res[i - 1], rtnl_held, - extack); + act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1], + &init_res[i - 1], flags, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1113,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, goto err_mod; err: - tcf_action_destroy(actions, bind); + tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { if (ops[i]) @@ -1351,8 +1351,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, module_put(ops->owner); err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - return 0; if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification"); @@ -1423,8 +1421,6 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - if (ret > 0) - return 0; return ret; } @@ -1481,7 +1477,6 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; - int err = 0; skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, GFP_KERNEL); @@ -1495,15 +1490,12 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], return -EINVAL; } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - err = 0; - return err; + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int tcf_action_add(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid, int ovr, + struct nlmsghdr *n, u32 portid, u32 flags, struct netlink_ext_ack *extack) { size_t attr_size = 0; @@ -1512,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, int init_res[TCA_ACT_MAX_PRIO] = {}; for (loop = 0; loop < 10; loop++) { - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, - actions, init_res, &attr_size, true, extack); + ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, + &attr_size, flags, extack); if (ret != -EAGAIN) break; } @@ -1543,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = NETLINK_CB(skb).portid; - int ret = 0, ovr = 0; + u32 flags = 0; + int ret = 0; if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) @@ -1569,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, * is zero) then just set this */ if (n->nlmsg_flags & NLM_F_REPLACE) - ovr = 1; - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr, + flags = TCA_ACT_FLAGS_REPLACE; + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags, extack); break; case RTM_DELACTION: diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index e409a0005717..040807aa15b9 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -275,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, - int replace, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; @@ -317,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (bind) return 0; - if (!replace) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); return -EEXIST; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index e19885d7fe2c..94e78ac7a748 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; @@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci = to_connmark(*a); if (bind) return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4fa4fcb842ba..a15ec95e69c3 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -41,11 +41,12 @@ static unsigned int csum_net_id; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 1b4b3514c94f..ad9df0cb4b98 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1235,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net, static int tcf_ct_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int replace, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ct_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_ct_params *params = NULL; struct nlattr *tb[TCA_CT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -1279,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (bind) return 0; - if (!replace) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index b20c8ce59905..549374a2d008 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = { static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; struct tcf_ctinfo_params *cp_new; @@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind) /* don't override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 73c3926358a0..d8dce173df37 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GACT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; @@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index a78cb7965718..7df72a4197a3 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, static int tcf_gate_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gate_net_id); enum tk_offsets tk_offset = TK_OFFS_TAI; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; struct tcf_chain *goto_ch = NULL; u64 cycletime = 0, basetime = 0; @@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a2ddea04183a..7064a365a1a9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, kfree(p); return err; } - err = load_metalist(tb2, rtnl_held); + err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) { kfree(p); return err; @@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); kfree(p); return -EEXIST; @@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = populate_metalist(ife, tb2, exists, rtnl_held); + err = populate_metalist(ife, tb2, exists, + !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) goto metadata_parse_err; } else { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index ac7297f42355..265b1443e252 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, int ovr, int bind, + const struct tc_action_ops *ops, struct tcf_proto *tp, u32 flags) { struct tc_action_net *tn = net_generic(net, id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IPT_MAX + 1]; struct tcf_ipt *ipt; struct xt_entry_target *td, *t; @@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } @@ -201,21 +202,21 @@ err1: } static int tcf_ipt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, - bind, tp, flags); + return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, + tp, flags); } static int tcf_xt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool unlocked, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, - bind, tp, flags); + return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, + tp, flags); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 7153c67f641e..37f51d778728 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - if (dev) - dev_put(dev); + dev_put(dev); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -91,11 +90,11 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; @@ -155,7 +154,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } @@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, mac_header_xmit = dev_is_mac_header_xmit(dev); dev = rcu_replace_pointer(m->tcfm_dev, dev, lockdep_is_held(&m->tcf_lock)); - if (dev) - dev_put(dev); + dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index d1486ea496a2..e4529b428cf4 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { static int tcf_mpls_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mpls_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MPLS_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_mpls_params *p; @@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 1ebd2a86d980..7dd6b586ba7f 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { }; static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, - struct tc_action **a, int ovr, int bind, - bool rtnl_held, struct tcf_proto *tp, + struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_NAT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; @@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, } else if (err > 0) { if (bind) return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b45304446e13..c6c862c459cc 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -136,11 +136,11 @@ nla_failure: static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_pedit_key *keys = NULL; @@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind) goto out_free; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { ret = -EEXIST; goto out_release; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0fab8de176d2..832157a840fc 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { int ret = 0, tcfp_result = TC_ACT_OK, err, size; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_police *parm; @@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; spin_lock_init(&(to_police(*a)->tcfp_lock)); - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 6a0c16e4351d..230501eb9e06 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { }; static int tcf_sample_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; u32 psample_group_num, rate, index; @@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 726cc956d06f..cbbe1861d3a2 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_DEF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; @@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { err = -EEXIST; goto release_idr; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e5f3fb8b00e3..605418538347 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); + bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(*a); - if (!ovr) { + if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 8d17a543cc9f..ecb9ee666095 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> +#include <net/inet_ecn.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -21,15 +22,13 @@ static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; -#define MAX_EDIT_LEN ETH_HLEN static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); - int action; + int action, max_edit_len, err; struct tcf_skbmod_params *p; u64 flags; - int err; tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); @@ -38,19 +37,34 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, if (unlikely(action == TC_ACT_SHOT)) goto drop; - if (!skb->dev || skb->dev->type != ARPHRD_ETHER) - return action; + max_edit_len = skb_mac_header_len(skb); + p = rcu_dereference_bh(d->skbmod_p); + flags = p->flags; + + /* tcf_skbmod_init() guarantees "flags" to be one of the following: + * 1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE} + * 2. SKBMOD_F_SWAPMAC + * 3. SKBMOD_F_ECN + * SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet + * packets. + */ + if (flags == SKBMOD_F_ECN) { + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): + case cpu_to_be16(ETH_P_IPV6): + max_edit_len += skb_network_header_len(skb); + break; + default: + goto out; + } + } else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) { + goto out; + } - /* XXX: if you are going to edit more fields beyond ethernet header - * (example when you add IP header replacement or vlan swap) - * then MAX_EDIT_LEN needs to change appropriately - */ - err = skb_ensure_writable(skb, MAX_EDIT_LEN); + err = skb_ensure_writable(skb, max_edit_len); if (unlikely(err)) /* best policy is to drop on the floor */ goto drop; - p = rcu_dereference_bh(d->skbmod_p); - flags = p->flags; if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); if (flags & SKBMOD_F_SMAC) @@ -66,6 +80,10 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); } + if (flags & SKBMOD_F_ECN) + INET_ECN_set_ce(skb); + +out: return action; drop: @@ -82,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); + bool ovr = flags & TCA_ACT_FLAGS_REPLACE; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; struct tcf_chain *goto_ch = NULL; @@ -129,6 +148,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, index = parm->index; if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; + if (parm->flags & SKBMOD_F_ECN) + lflags = SKBMOD_F_ECN; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 85c0d0d5b9da..d9cd174eecb7 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; @@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "TC IDR already exists"); ret = -EEXIST; goto release_tun_meta; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 71f2015c70ca..e4dc5a555bd8 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -114,11 +114,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_VLAN_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool push_prio_exists = false; @@ -223,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e3e79e9bd706..69185e311422 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1577,21 +1577,11 @@ reset: #endif } -int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, +int tcf_classify(struct sk_buff *skb, + const struct tcf_block *block, + const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { - u32 last_executed_chain = 0; - - return __tcf_classify(skb, tp, tp, res, compat_mode, - &last_executed_chain); -} -EXPORT_SYMBOL(tcf_classify); - -int tcf_classify_ingress(struct sk_buff *skb, - const struct tcf_block *ingress_block, - const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) -{ #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 last_executed_chain = 0; @@ -1603,20 +1593,22 @@ int tcf_classify_ingress(struct sk_buff *skb, struct tc_skb_ext *ext; int ret; - ext = skb_ext_find(skb, TC_SKB_EXT); + if (block) { + ext = skb_ext_find(skb, TC_SKB_EXT); - if (ext && ext->chain) { - struct tcf_chain *fchain; + if (ext && ext->chain) { + struct tcf_chain *fchain; - fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain); - if (!fchain) - return TC_ACT_SHOT; + fchain = tcf_chain_lookup_rcu(block, ext->chain); + if (!fchain) + return TC_ACT_SHOT; - /* Consume, so cloned/redirect skbs won't inherit ext */ - skb_ext_del(skb, TC_SKB_EXT); + /* Consume, so cloned/redirect skbs won't inherit ext */ + skb_ext_del(skb, TC_SKB_EXT); - tp = rcu_dereference_bh(fchain->filter_chain); - last_executed_chain = fchain->index; + tp = rcu_dereference_bh(fchain->filter_chain); + last_executed_chain = fchain->index; + } } ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, @@ -1635,7 +1627,7 @@ int tcf_classify_ingress(struct sk_buff *skb, return ret; #endif } -EXPORT_SYMBOL(tcf_classify_ingress); +EXPORT_SYMBOL(tcf_classify); struct tcf_chain_info { struct tcf_proto __rcu **pprev; @@ -1870,13 +1862,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - - if (err > 0) - err = 0; return err; } @@ -1909,15 +1898,13 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); - if (err > 0) - err = 0; return err; } @@ -1962,6 +1949,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; int tp_created; bool rtnl_held = false; + u32 flags = 0; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -2125,9 +2113,12 @@ replay: goto errout; } + if (!(n->nlmsg_flags & NLM_F_CREATE)) + flags |= TCA_ACT_FLAGS_REPLACE; + if (!rtnl_held) + flags |= TCA_ACT_FLAGS_NO_RTNL; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, - n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, - rtnl_held, extack); + flags, extack); if (err == 0) { tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held); @@ -2711,13 +2702,11 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, } if (unicast) - err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); - if (err > 0) - err = 0; return err; } @@ -2741,7 +2730,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + return rtnl_unicast(skb, net, portid); return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } @@ -3035,8 +3024,8 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -3047,13 +3036,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack); + a_o = tc_action_load_ops(tb[exts->police], true, + !(flags & TCA_ACT_FLAGS_NO_RTNL), + extack); if (IS_ERR(a_o)) return PTR_ERR(a_o); + flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; act = tcf_action_init_1(net, tp, tb[exts->police], - rate_tlv, "police", ovr, - TCA_ACT_BIND, a_o, init_res, - rtnl_held, extack); + rate_tlv, a_o, init_res, flags, + extack); module_put(a_o->owner); if (IS_ERR(act)) return PTR_ERR(act); @@ -3065,10 +3056,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } else if (exts->action && tb[exts->action]) { int err; + flags |= TCA_ACT_FLAGS_BIND; err = tcf_action_init(net, tp, tb[exts->action], - rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, init_res, - &attr_size, rtnl_held, extack); + rate_tlv, exts->actions, init_res, + &attr_size, flags, extack); if (err < 0) return err; exts->nr_actions = err; @@ -3832,7 +3823,7 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru fl = rcu_dereference_bh(qe->filter_chain); - switch (tcf_classify(skb, fl, &cl_res, false)) { + switch (tcf_classify(skb, NULL, fl, &cl_res, false)) { case TC_ACT_SHOT: qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f256a7c69093..8158fc9ee1ab 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; @@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, + u32 flags, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, goto errout; } - err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, + err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags, extack); if (err < 0) { if (!fold) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fa739efa59f4..3b472bafdc9d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -404,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, - struct nlattr **tb, struct nlattr *est, bool ovr, + struct nlattr **tb, struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { bool is_bpf, is_ebpf, have_exts = false; @@ -416,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true, + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags, extack); if (ret < 0) return ret; @@ -455,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -500,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; prog->handle = handle; - ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr, + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags, extack); if (ret < 0) goto errout_idr; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index fb881144fa01..ed00001b528a 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; @@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags, + extack); if (err < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 87398af2715a..972303aa8edd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); @@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto err2; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags, + extack); if (err < 0) goto err2; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d7869a984881..23b21253b3c3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1915,23 +1915,22 @@ errout_cleanup: static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, - struct fl_flow_tmplt *tmplt, bool rtnl_held, + struct nlattr *est, + struct fl_flow_tmplt *tmplt, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held, - extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); - if (!rtnl_held) + if (flags & TCA_ACT_FLAGS_NO_RTNL) rtnl_lock(); tcf_bind_filter(tp, &f->res, base); - if (!rtnl_held) + if (flags & TCA_ACT_FLAGS_NO_RTNL) rtnl_unlock(); } @@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_fl_head *head = fl_head_dereference(tp); + bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct fl_flow_mask *mask; @@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, - tp->chain->tmplt_priv, rtnl_held, extack); + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], + tp->chain->tmplt_priv, flags, extack); if (err) goto errout; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ec945294626a..8654b0ce997c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_set_parms(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, - struct nlattr **tca, unsigned long base, bool ovr, + struct nlattr **tca, unsigned long base, u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); u32 mask; int err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags, + extack); if (err < 0) return err; @@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr, bool rtnl_held, - struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack); + err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack); + err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index cafb84480bab..24f0046ce0b3 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { static int mall_set_parms(struct net *net, struct tcf_proto *tp, struct cls_mall_head *head, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true, - extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack); if (err < 0) return err; @@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; struct cls_mall_head *new; - u32 flags = 0; + u32 userflags = 0; int err; if (!tca[TCA_OPTIONS]) @@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, return err; if (tb[TCA_MATCHALL_FLAGS]) { - flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]); - if (!tc_flags_valid(flags)) + userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]); + if (!tc_flags_valid(userflags)) return -EINVAL; } @@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!handle) handle = 1; new->handle = handle; - new->flags = flags; + new->flags = userflags; new->pf = alloc_percpu(struct tc_matchall_pcnt); if (!new->pf) { err = -ENOMEM; goto err_alloc_percpu; } - err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags, extack); if (err) goto err_set_parms; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 5efa3e7ace15..a35ab8c27866 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, struct nlattr **tb, struct nlattr *est, int new, - bool ovr, struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; @@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; @@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, u32 flags, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], new, ovr, extack); + tca[TCA_RATE], new, flags, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 27a4b6dbcf57..5cd9d6b143c4 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + u32 handle, struct nlattr **tca, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); @@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags, extack); if (err < 0) goto errout2; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e9a8a2c86bbd..742c7d49a958 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -330,7 +330,7 @@ static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_data *cp = NULL, *oldp; @@ -342,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack); if (err < 0) goto errout; @@ -529,8 +529,8 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, u32 flags, + struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -551,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE], ovr, extack); + tca[TCA_RATE], flags, extack); } static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6e1abe805448..4272814487f0 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack); if (err < 0) return err; @@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, + struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct tc_u32_sel *s; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; - u32 htid, flags = 0; + u32 htid, userflags = 0; size_t sel_size; int err; @@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; if (tb[TCA_U32_FLAGS]) { - flags = nla_get_u32(tb[TCA_U32_FLAGS]); - if (!tc_flags_valid(flags)) { + userflags = nla_get_u32(tb[TCA_U32_FLAGS]); + if (!tc_flags_valid(userflags)) { NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); return -EINVAL; } @@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if ((n->flags ^ flags) & + if ((n->flags ^ userflags) & ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); return -EINVAL; @@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -ENOMEM; err = u32_set_parms(net, tp, base, new, tb, - tca[TCA_RATE], ovr, extack); + tca[TCA_RATE], flags, extack); if (err) { u32_destroy_key(new, false); @@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->handle = handle; ht->prio = tp->prio; idr_init(&ht->handle_idr); - ht->flags = flags; + ht->flags = userflags; - err = u32_replace_hw_hnode(tp, ht, flags, extack); + err = u32_replace_hw_hnode(tp, ht, userflags, extack); if (err) { idr_remove(&tp_c->handle_idr, handle); kfree(ht); @@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; - n->flags = flags; + n->flags = userflags; err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) @@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr, + err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags, extack); if (err == 0) { struct tc_u_knode __rcu **ins; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f87d07736a14..5e90e9b160e3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1845,7 +1845,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -1856,11 +1855,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, return -EINVAL; } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - err = 0; - return err; + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int tclass_del_notify(struct net *net, @@ -1894,8 +1890,6 @@ static int tclass_del_notify(struct net *net, err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - err = 0; return err; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d0c9a57398fc..7d8518176b45 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -394,7 +394,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_for_each_entry(flow, &p->flows, list) { fl = rcu_dereference_bh(flow->filter_list); if (fl) { - result = tcf_classify(skb, fl, &res, true); + result = tcf_classify(skb, NULL, fl, &res, true); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 951542843cab..ecc5c4d93779 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1665,7 +1665,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, goto hash; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, filter, &res, false); + result = tcf_classify(skb, NULL, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index b79a7e27bb31..2dabaffd39d0 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -228,7 +228,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - result = tcf_classify(skb, fl, &res, true); + result = tcf_classify(skb, NULL, fl, &res, true); if (!fl || result < 0) goto fallback; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index fc1e47069593..642cd179b7a7 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -317,7 +317,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index d320bcfb2da2..4c100d105269 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -242,7 +242,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, else { struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tcf_classify(skb, fl, &res, false); + int result = tcf_classify(skb, NULL, fl, &res, false); pr_debug("result %d class 0x%04x\n", result, res.classid); diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index c1e84d1eeaba..925924fab1ab 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -390,7 +390,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tcf_classify(skb, fl, &res, false); + err = tcf_classify(skb, NULL, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index bbd5f8753600..c4afdd026f51 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -91,7 +91,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, filter, &res, false); + result = tcf_classify(skb, NULL, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index cac684952edc..830f3559f727 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -94,7 +94,7 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_pie_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, filter, &res, false); + result = tcf_classify(skb, NULL, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index bf0034c66e35..b7ac30cca035 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1130,7 +1130,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; tcf = rcu_dereference_bh(q->root.filter_list); - while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5f7ac27a5264..81ea8332547a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -238,7 +238,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 5c27b4270b90..e282e7382117 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -36,7 +36,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - err = tcf_classify(skb, fl, &res, false); + err = tcf_classify(skb, NULL, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3eabb871a1d5..03fdf31ccb6a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -39,7 +39,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tcf_classify(skb, fl, &res, false); + err = tcf_classify(skb, NULL, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index b692a0de1ad5..58a9d42b52b8 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -690,7 +690,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index dde829d4b9f8..3d061a13d7ed 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -257,7 +257,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, struct tcf_result res; int result; - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 066754a18569..f8e569f79f13 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 7d7ba0320d5a..a8845343d183 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) if (!libdev->ops.get_netdev) continue; lndev = libdev->ops.get_netdev(libdev, i + 1); - if (lndev) - dev_put(lndev); + dev_put(lndev); if (lndev != ndev) continue; if (event == NETDEV_REGISTER) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 6f6d33edb135..4a964e9190b0 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, return 0; out_put: - if (ndev) - dev_put(ndev); + dev_put(ndev); return rc; } diff --git a/net/socket.c b/net/socket.c index 0b2dad3bdf7f..3c10504e46d9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -212,6 +212,7 @@ static const char * const pf_family_names[] = { [PF_QIPCRTR] = "PF_QIPCRTR", [PF_SMC] = "PF_SMC", [PF_XDP] = "PF_XDP", + [PF_MCTP] = "PF_MCTP", }; /* @@ -1064,9 +1065,13 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); +static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br, + unsigned int cmd, struct ifreq *ifr, + void __user *uarg); -void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) +void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, + unsigned int cmd, struct ifreq *ifr, + void __user *uarg)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@ -1074,6 +1079,22 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) } EXPORT_SYMBOL(brioctl_set); +int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, + struct ifreq *ifr, void __user *uarg) +{ + int err = -ENOPKG; + + if (!br_ioctl_hook) + request_module("bridge"); + + mutex_lock(&br_ioctl_mutex); + if (br_ioctl_hook) + err = br_ioctl_hook(net, br, cmd, ifr, uarg); + mutex_unlock(&br_ioctl_mutex); + + return err; +} + static DEFINE_MUTEX(vlan_ioctl_mutex); static int (*vlan_ioctl_hook) (struct net *, void __user *arg); @@ -1088,8 +1109,11 @@ EXPORT_SYMBOL(vlan_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, unsigned int cmd, unsigned long arg) { + struct ifreq ifr; + bool need_copyout; int err; void __user *argp = (void __user *)arg; + void __user *data; err = sock->ops->ioctl(sock, cmd, arg); @@ -1100,25 +1124,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, if (err != -ENOIOCTLCMD) return err; - if (cmd == SIOCGIFCONF) { - struct ifconf ifc; - if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) - return -EFAULT; - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); - rtnl_unlock(); - if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) - err = -EFAULT; - } else { - struct ifreq ifr; - bool need_copyout; - if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, &data, argp)) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, data, &need_copyout); + if (!err && need_copyout) + if (put_user_ifreq(&ifr, argp)) return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, &need_copyout); - if (!err && need_copyout) - if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) - return -EFAULT; - } + return err; } @@ -1140,12 +1152,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) net = sock_net(sk); if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { struct ifreq ifr; + void __user *data; bool need_copyout; - if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, &data, argp)) return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, &need_copyout); + err = dev_ioctl(net, cmd, &ifr, data, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + if (put_user_ifreq(&ifr, argp)) return -EFAULT; } else #ifdef CONFIG_WEXT_CORE @@ -1170,14 +1183,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) case SIOCSIFBR: case SIOCBRADDBR: case SIOCBRDELBR: - err = -ENOPKG; - if (!br_ioctl_hook) - request_module("bridge"); - - mutex_lock(&br_ioctl_mutex); - if (br_ioctl_hook) - err = br_ioctl_hook(net, cmd, argp); - mutex_unlock(&br_ioctl_mutex); + err = br_ioctl_call(net, NULL, cmd, NULL, argp); break; case SIOCGIFVLAN: case SIOCSIFVLAN: @@ -1217,6 +1223,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) cmd == SIOCGSTAMP_NEW, false); break; + + case SIOCGIFCONF: + err = dev_ifconf(net, argp); + break; + default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -3126,154 +3137,55 @@ void socket_seq_show(struct seq_file *seq) } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_COMPAT -static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +/* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ +int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg) { - struct compat_ifconf ifc32; - struct ifconf ifc; - int err; + if (in_compat_syscall()) { + struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr; - if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) - return -EFAULT; + memset(ifr, 0, sizeof(*ifr)); + if (copy_from_user(ifr32, arg, sizeof(*ifr32))) + return -EFAULT; - ifc.ifc_len = ifc32.ifc_len; - ifc.ifc_req = compat_ptr(ifc32.ifcbuf); + if (ifrdata) + *ifrdata = compat_ptr(ifr32->ifr_data); - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); - rtnl_unlock(); - if (err) - return err; + return 0; + } - ifc32.ifc_len = ifc.ifc_len; - if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) + if (copy_from_user(ifr, arg, sizeof(*ifr))) return -EFAULT; + if (ifrdata) + *ifrdata = ifr->ifr_data; + return 0; } +EXPORT_SYMBOL(get_user_ifreq); -static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) +int put_user_ifreq(struct ifreq *ifr, void __user *arg) { - struct compat_ethtool_rxnfc __user *compat_rxnfc; - bool convert_in = false, convert_out = false; - size_t buf_size = 0; - struct ethtool_rxnfc __user *rxnfc = NULL; - struct ifreq ifr; - u32 rule_cnt = 0, actual_rule_cnt; - u32 ethcmd; - u32 data; - int ret; - - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; - - compat_rxnfc = compat_ptr(data); - - if (get_user(ethcmd, &compat_rxnfc->cmd)) - return -EFAULT; + size_t size = sizeof(*ifr); - /* Most ethtool structures are defined without padding. - * Unfortunately struct ethtool_rxnfc is an exception. - */ - switch (ethcmd) { - default: - break; - case ETHTOOL_GRXCLSRLALL: - /* Buffer size is variable */ - if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) - return -EFAULT; - if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) - return -ENOMEM; - buf_size += rule_cnt * sizeof(u32); - fallthrough; - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_SRXCLSRLINS: - convert_out = true; - fallthrough; - case ETHTOOL_SRXCLSRLDEL: - buf_size += sizeof(struct ethtool_rxnfc); - convert_in = true; - rxnfc = compat_alloc_user_space(buf_size); - break; - } + if (in_compat_syscall()) + size = sizeof(struct compat_ifreq); - if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + if (copy_to_user(arg, ifr, size)) return -EFAULT; - ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; - - if (convert_in) { - /* We expect there to be holes between fs.m_ext and - * fs.ring_cookie and at the end of fs, but nowhere else. - */ - BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + - sizeof(compat_rxnfc->fs.m_ext) != - offsetof(struct ethtool_rxnfc, fs.m_ext) + - sizeof(rxnfc->fs.m_ext)); - BUILD_BUG_ON( - offsetof(struct compat_ethtool_rxnfc, fs.location) - - offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != - offsetof(struct ethtool_rxnfc, fs.location) - - offsetof(struct ethtool_rxnfc, fs.ring_cookie)); - - if (copy_in_user(rxnfc, compat_rxnfc, - (void __user *)(&rxnfc->fs.m_ext + 1) - - (void __user *)rxnfc) || - copy_in_user(&rxnfc->fs.ring_cookie, - &compat_rxnfc->fs.ring_cookie, - (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie)) - return -EFAULT; - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - if (put_user(rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - } else if (copy_in_user(&rxnfc->rule_cnt, - &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - } - - ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); - if (ret) - return ret; - - if (convert_out) { - if (copy_in_user(compat_rxnfc, rxnfc, - (const void __user *)(&rxnfc->fs.m_ext + 1) - - (const void __user *)rxnfc) || - copy_in_user(&compat_rxnfc->fs.ring_cookie, - &rxnfc->fs.ring_cookie, - (const void __user *)(&rxnfc->fs.location + 1) - - (const void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - /* As an optimisation, we only copy the actual - * number of rules that the underlying - * function returned. Since Mallory might - * change the rule count in user memory, we - * check that it is less than the rule count - * originally given (as the user buffer size), - * which has been range-checked. - */ - if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - if (actual_rule_cnt < rule_cnt) - rule_cnt = actual_rule_cnt; - if (copy_in_user(&compat_rxnfc->rule_locs[0], - &rxnfc->rule_locs[0], - rule_cnt * sizeof(u32))) - return -EFAULT; - } - } - return 0; } +EXPORT_SYMBOL(put_user_ifreq); +#ifdef CONFIG_COMPAT static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { compat_uptr_t uptr32; @@ -3281,7 +3193,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 void __user *saved; int err; - if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) + if (get_user_ifreq(&ifr, NULL, uifr32)) return -EFAULT; if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) @@ -3290,10 +3202,10 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); - err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); + err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL); if (!err) { ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; - if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + if (put_user_ifreq(&ifr, uifr32)) err = -EFAULT; } return err; @@ -3304,97 +3216,13 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { struct ifreq ifreq; - u32 data32; + void __user *data; - if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) - return -EFAULT; - if (get_user(data32, &u_ifreq32->ifr_data)) + if (get_user_ifreq(&ifreq, &data, u_ifreq32)) return -EFAULT; - ifreq.ifr_data = compat_ptr(data32); + ifreq.ifr_data = data; - return dev_ioctl(net, cmd, &ifreq, NULL); -} - -static int compat_ifreq_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, - struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - /* Handle the fact that while struct ifreq has the same *layout* on - * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, - * which are handled elsewhere, it still has different *size* due to - * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, - * resulting in struct ifreq being 32 and 40 bytes respectively). - * As a result, if the struct happens to be at the end of a page and - * the next page isn't readable/writable, we get a fault. To prevent - * that, copy back and forth to the full size. - */ - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) - return -EFAULT; - - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCGIFNAME: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; -} - -static int compat_sioc_ifmap(struct net *net, unsigned int cmd, - struct compat_ifreq __user *uifr32) -{ - struct ifreq ifr; - struct compat_ifmap __user *uifmap32; - int err; - - uifmap32 = &uifr32->ifr_ifru.ifru_map; - err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); - err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= get_user(ifr.ifr_map.port, &uifmap32->port); - if (err) - return -EFAULT; - - err = dev_ioctl(net, cmd, &ifr, NULL); - - if (cmd == SIOCGIFMAP && !err) { - err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); - err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= put_user(ifr.ifr_map.port, &uifmap32->port); - if (err) - err = -EFAULT; - } - return err; + return dev_ioctl(net, cmd, &ifreq, data, NULL); } /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE @@ -3420,21 +3248,14 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, struct net *net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - return compat_ifr_data_ioctl(net, cmd, argp); + return sock_ioctl(file, cmd, (unsigned long)argp); switch (cmd) { case SIOCSIFBR: case SIOCGIFBR: return old_bridge_ioctl(argp); - case SIOCGIFCONF: - return compat_dev_ifconf(net, argp); - case SIOCETHTOOL: - return ethtool_ioctl(net, argp); case SIOCWANDEV: return compat_siocwandev(net, argp); - case SIOCGIFMAP: - case SIOCSIFMAP: - return compat_sioc_ifmap(net, cmd, argp); case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: if (!sock->ops->gettstamp) @@ -3442,6 +3263,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, !COMPAT_USE_64BIT_TIME); + case SIOCETHTOOL: case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: @@ -3459,10 +3281,13 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGSKNS: case SIOCGSTAMP_NEW: case SIOCGSTAMPNS_NEW: + case SIOCGIFCONF: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: case SIOCSIFFLAGS: + case SIOCGIFMAP: + case SIOCSIFMAP: case SIOCGIFMETRIC: case SIOCSIFMETRIC: case SIOCGIFMTU: @@ -3499,8 +3324,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - return compat_ifreq_ioctl(net, sock, cmd, argp); - case SIOCSARP: case SIOCGARP: case SIOCDARP: diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 070698dd19bc..0b2c18efc079 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -378,6 +378,266 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); +struct switchdev_nested_priv { + bool (*check_cb)(const struct net_device *dev); + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev); + const struct net_device *dev; + struct net_device *lower_dev; +}; + +static int switchdev_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) +{ + struct switchdev_nested_priv *switchdev_priv = priv->data; + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev); + bool (*check_cb)(const struct net_device *dev); + const struct net_device *dev; + + check_cb = switchdev_priv->check_cb; + foreign_dev_check_cb = switchdev_priv->foreign_dev_check_cb; + dev = switchdev_priv->dev; + + if (check_cb(lower_dev) && !foreign_dev_check_cb(lower_dev, dev)) { + switchdev_priv->lower_dev = lower_dev; + return 1; + } + + return 0; +} + +static struct net_device * +switchdev_lower_dev_find(struct net_device *dev, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev)) +{ + struct switchdev_nested_priv switchdev_priv = { + .check_cb = check_cb, + .foreign_dev_check_cb = foreign_dev_check_cb, + .dev = dev, + .lower_dev = NULL, + }; + struct netdev_nested_priv priv = { + .data = &switchdev_priv, + }; + + netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv); + + return switchdev_priv.lower_dev; +} + +static int __switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct net_device *orig_dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + const struct switchdev_notifier_info *info = &fdb_info->info; + struct net_device *br, *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) + return add_cb(dev, orig_dev, info->ctx, fdb_info); + + if (netif_is_lag_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + goto maybe_bridged_with_us; + + /* This is a LAG interface that we offload */ + if (!lag_add_cb) + return -EOPNOTSUPP; + + return lag_add_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* Recurse through lower interfaces in case the FDB entry is pointing + * towards a bridge device. + */ + if (netif_is_bridge_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + return 0; + + /* This is a bridge interface that we offload */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; + + /* Bridge ports might be either us, or LAG interfaces + * that we offload. + */ + if (!check_cb(lower_dev) && + !switchdev_lower_dev_find(lower_dev, check_cb, + foreign_dev_check_cb)) + continue; + + err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return 0; + } + +maybe_bridged_with_us: + /* Event is neither on a bridge nor a LAG. Check whether it is on an + * interface that is in a bridge with us. + */ + br = netdev_master_upper_dev_get_rcu(dev); + if (!br || !netif_is_bridge_master(br)) + return 0; + + if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) + return 0; + + return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info, + check_cb, foreign_dev_check_cb, + add_cb, lag_add_cb); +} + +int switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + int err; + + err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); + if (err == -EOPNOTSUPP) + err = 0; + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device); + +static int __switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct net_device *orig_dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + const struct switchdev_notifier_info *info = &fdb_info->info; + struct net_device *br, *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) + return del_cb(dev, orig_dev, info->ctx, fdb_info); + + if (netif_is_lag_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + goto maybe_bridged_with_us; + + /* This is a LAG interface that we offload */ + if (!lag_del_cb) + return -EOPNOTSUPP; + + return lag_del_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* Recurse through lower interfaces in case the FDB entry is pointing + * towards a bridge device. + */ + if (netif_is_bridge_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + return 0; + + /* This is a bridge interface that we offload */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; + + /* Bridge ports might be either us, or LAG interfaces + * that we offload. + */ + if (!check_cb(lower_dev) && + !switchdev_lower_dev_find(lower_dev, check_cb, + foreign_dev_check_cb)) + continue; + + err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return 0; + } + +maybe_bridged_with_us: + /* Event is neither on a bridge nor a LAG. Check whether it is on an + * interface that is in a bridge with us. + */ + br = netdev_master_upper_dev_get_rcu(dev); + if (!br || !netif_is_bridge_master(br)) + return 0; + + if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) + return 0; + + return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info, + check_cb, foreign_dev_check_cb, + del_cb, lag_del_cb); +} + +int switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + int err; + + err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err == -EOPNOTSUPP) + err = 0; + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device); + static int __switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), @@ -549,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev, return err; } EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set); + +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .dev = dev, + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + .tx_fwd_offload = tx_fwd_offload, + }, + }; + int err; + + ASSERT_RTNL(); + + err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED, + brport_dev, &brport_info.info, + extack); + return notifier_to_errno(err); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); + +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + }, + }; + + ASSERT_RTNL(); + + call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED, + brport_dev, &brport_info.info, + NULL); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 75b99b7eda22..b15b2b1b2f38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1886,6 +1886,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; @@ -1909,6 +1910,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); @@ -1928,18 +1930,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { - copy = min_t(int, dlen, buflen); - if (unlikely(copy != dlen)) - m->msg_flags |= MSG_TRUNC; - rc = skb_copy_datagram_msg(skb, hlen, m, copy); + int offset = skb_cb->bytes_read; + + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + goto exit; + if (unlikely(offset + copy < dlen)) { + if (flags & MSG_EOR) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_TRUNC; + skb_cb->bytes_read = 0; + } + } else { + if (flags & MSG_EOR) + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; + } } else { copy = 0; rc = 0; - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET; + goto exit; + } } - if (unlikely(rc)) - goto exit; /* Mark message as group event if applicable */ if (unlikely(grp_evt)) { @@ -1962,6 +1979,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, tipc_node_distr_xmit(sock_net(sk), &xmitq); } + if (skb_cb->bytes_read) + goto exit; + tsk_advance_rx_queue(sk); if (likely(!connected)) diff --git a/net/unix/Kconfig b/net/unix/Kconfig index b6c4282899ec..b7f811216820 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -25,6 +25,11 @@ config UNIX_SCM depends on UNIX default y +config AF_UNIX_OOB + bool + depends on UNIX + default y + config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX diff --git a/net/unix/Makefile b/net/unix/Makefile index 54e58cc4f945..20491825b4d0 100644 --- a/net/unix/Makefile +++ b/net/unix/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o unix-y := af_unix.o garbage.o unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o +unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o obj-$(CONFIG_UNIX_DIAG) += unix_diag.o unix_diag-y := diag.o diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ba7ced947e51..ec02e70a549b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -494,6 +494,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) sk_error_report(other); } } + sk->sk_state = other->sk_state = TCP_CLOSE; } static void unix_sock_destructor(struct sock *sk) @@ -502,6 +503,12 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); @@ -669,6 +676,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); @@ -746,6 +755,7 @@ static const struct proto_ops unix_dgram_ops = { .listen = sock_no_listen, .shutdown = unix_shutdown, .sendmsg = unix_dgram_sendmsg, + .read_sock = unix_read_sock, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, @@ -777,10 +787,21 @@ static const struct proto_ops unix_seqpacket_ops = { .show_fdinfo = unix_show_fdinfo, }; -static struct proto unix_proto = { +static void unix_close(struct sock *sk, long timeout) +{ + /* Nothing to do here, unix socket does not need a ->close(). + * This is merely for sockmap. + */ +} + +struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), + .close = unix_close, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = unix_bpf_update_proto, +#endif }; static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) @@ -864,6 +885,7 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + sk->sk_prot->close(sk, 0); unix_release_sock(sk, 0); sock->sk = NULL; @@ -1199,6 +1221,9 @@ restart: unix_peer(sk) = other; unix_state_double_unlock(sk, other); } + + if (unix_peer(sk)) + sk->sk_state = other->sk_state = TCP_ESTABLISHED; return 0; out_unlock: @@ -1431,12 +1456,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) init_peercred(ska); init_peercred(skb); - if (ska->sk_type != SOCK_DGRAM) { - ska->sk_state = TCP_ESTABLISHED; - skb->sk_state = TCP_ESTABLISHED; - socka->state = SS_CONNECTED; - sockb->state = SS_CONNECTED; - } + ska->sk_state = TCP_ESTABLISHED; + skb->sk_state = TCP_ESTABLISHED; + socka->state = SS_CONNECTED; + sockb->state = SS_CONNECTED; return 0; } @@ -1872,6 +1895,46 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +{ + struct unix_sock *ousk = unix_sk(other); + struct sk_buff *skb; + int err = 0; + + skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); + + if (!skb) + return err; + + skb_put(skb, 1); + skb->len = 1; + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); + + if (err) { + kfree_skb(skb); + return err; + } + + unix_state_lock(other); + maybe_add_creds(skb, sock, other); + skb_get(skb); + + if (ousk->oob_skb) + kfree_skb(ousk->oob_skb); + + ousk->oob_skb = skb; + + scm_stat_add(other, skb); + skb_queue_tail(&other->sk_receive_queue, skb); + sk_send_sigurg(other); + unix_state_unlock(other); + other->sk_data_ready(other); + + return err; +} +#endif + static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1890,8 +1953,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, return err; err = -EOPNOTSUPP; - if (msg->msg_flags&MSG_OOB) - goto out_err; + if (msg->msg_flags & MSG_OOB) { +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (len) + len--; + else +#endif + goto out_err; + } if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; @@ -1956,6 +2025,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, sent += size; } +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (msg->msg_flags & MSG_OOB) { + err = queue_oob(sock, msg, other); + if (err) + goto out_err; + sent++; + } +#endif + scm_destroy(&scm); return sent; @@ -2128,11 +2206,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) } } -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, + int flags) { struct scm_cookie scm; - struct sock *sk = sock->sk; + struct socket *sock = sk->sk_socket; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb, *last; long timeo; @@ -2235,6 +2313,53 @@ out: return err; } +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) +{ + struct sock *sk = sock->sk; + +#ifdef CONFIG_BPF_SYSCALL + if (sk->sk_prot != &unix_proto) + return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, + flags & ~MSG_DONTWAIT, NULL); +#endif + return __unix_dgram_recvmsg(sk, msg, size, flags); +} + +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + int copied = 0; + + while (1) { + struct unix_sock *u = unix_sk(sk); + struct sk_buff *skb; + int used, err; + + mutex_lock(&u->iolock); + skb = skb_recv_datagram(sk, 0, 1, &err); + mutex_unlock(&u->iolock); + if (!skb) + return err; + + used = recv_actor(desc, skb, 0, skb->len); + if (used <= 0) { + if (!copied) + copied = used; + kfree_skb(skb); + break; + } else if (used <= skb->len) { + copied += used; + } + + kfree_skb(skb); + if (!desc->count) + break; + } + + return copied; +} + /* * Sleep until more data has arrived. But check for races.. */ @@ -2294,6 +2419,59 @@ struct unix_stream_read_state { unsigned int splice_flags; }; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) +static int unix_stream_recv_urg(struct unix_stream_read_state *state) +{ + struct socket *sock = state->socket; + struct sock *sk = sock->sk; + struct unix_sock *u = unix_sk(sk); + int chunk = 1; + + if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) + return -EINVAL; + + chunk = state->recv_actor(u->oob_skb, 0, chunk, state); + if (chunk < 0) + return -EFAULT; + + if (!(state->flags & MSG_PEEK)) { + UNIXCB(u->oob_skb).consumed += 1; + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } + state->msg->msg_flags |= MSG_OOB; + return 1; +} + +static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + int flags, int copied) +{ + struct unix_sock *u = unix_sk(sk); + + if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = NULL; + } else { + if (skb == u->oob_skb) { + if (copied) { + skb = NULL; + } else if (sock_flag(sk, SOCK_URGINLINE)) { + if (!(flags & MSG_PEEK)) { + u->oob_skb = NULL; + consume_skb(skb); + } + } else if (!(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = skb_peek(&sk->sk_receive_queue); + } + } + } + return skb; +} +#endif + static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { @@ -2319,6 +2497,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, if (unlikely(flags & MSG_OOB)) { err = -EOPNOTSUPP; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + mutex_lock(&u->iolock); + unix_state_lock(sk); + + err = unix_stream_recv_urg(state); + + unix_state_unlock(sk); + mutex_unlock(&u->iolock); +#endif goto out; } @@ -2347,6 +2534,18 @@ redo: } last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); + if (!skb) { + unix_state_unlock(sk); + if (copied) + break; + goto redo; + } + } +#endif again: if (skb == NULL) { if (copied >= target) @@ -2682,6 +2881,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCUNIXFILE: err = unix_open_file(sk); break; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + case SIOCATMARK: + { + struct sk_buff *skb; + struct unix_sock *u = unix_sk(sk); + int answ = 0; + + skb = skb_peek(&sk->sk_receive_queue); + if (skb && skb == u->oob_skb) + answ = 1; + err = put_user(answ, (int __user *)arg); + } + break; +#endif default: err = -ENOIOCTLCMD; break; @@ -2972,6 +3185,7 @@ static int __init af_unix_init(void) sock_register(&unix_family_ops); register_pernet_subsys(&unix_net_ops); + unix_bpf_build_proto(); out: return rc; } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c new file mode 100644 index 000000000000..177e883f451e --- /dev/null +++ b/net/unix/unix_bpf.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */ + +#include <linux/skmsg.h> +#include <linux/bpf.h> +#include <net/sock.h> +#include <net/af_unix.h> + +#define unix_sk_has_data(__sk, __psock) \ + ({ !skb_queue_empty(&__sk->sk_receive_queue) || \ + !skb_queue_empty(&__psock->ingress_skb) || \ + !list_empty(&__psock->ingress_msg); \ + }) + +static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock, + long timeo) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct unix_sock *u = unix_sk(sk); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + if (!unix_sk_has_data(sk, psock)) { + mutex_unlock(&u->iolock); + wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + mutex_lock(&u->iolock); + ret = unix_sk_has_data(sk, psock); + } + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + +static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, + int *addr_len) +{ + struct unix_sock *u = unix_sk(sk); + struct sk_psock *psock; + int copied; + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return __unix_dgram_recvmsg(sk, msg, len, flags); + + mutex_lock(&u->iolock); + if (!skb_queue_empty(&sk->sk_receive_queue) && + sk_psock_queue_empty(psock)) { + mutex_unlock(&u->iolock); + sk_psock_put(sk, psock); + return __unix_dgram_recvmsg(sk, msg, len, flags); + } + +msg_bytes_ready: + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + long timeo; + int data; + + timeo = sock_rcvtimeo(sk, nonblock); + data = unix_msg_wait_data(sk, psock, timeo); + if (data) { + if (!sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + mutex_unlock(&u->iolock); + sk_psock_put(sk, psock); + return __unix_dgram_recvmsg(sk, msg, len, flags); + } + copied = -EAGAIN; + } + mutex_unlock(&u->iolock); + sk_psock_put(sk, psock); + return copied; +} + +static struct proto *unix_prot_saved __read_mostly; +static DEFINE_SPINLOCK(unix_prot_lock); +static struct proto unix_bpf_prot; + +static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +{ + *prot = *base; + prot->close = sock_map_close; + prot->recvmsg = unix_dgram_bpf_recvmsg; +} + +static void unix_bpf_check_needs_rebuild(struct proto *ops) +{ + if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) { + spin_lock_bh(&unix_prot_lock); + if (likely(ops != unix_prot_saved)) { + unix_bpf_rebuild_protos(&unix_bpf_prot, ops); + smp_store_release(&unix_prot_saved, ops); + } + spin_unlock_bh(&unix_prot_lock); + } +} + +int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +{ + if (restore) { + sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + return 0; + } + + unix_bpf_check_needs_rebuild(psock->sk_proto); + WRITE_ONCE(sk->sk_prot, &unix_bpf_prot); + return 0; +} + +void __init unix_bpf_build_proto(void) +{ + unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto); +} diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 16c88beea48b..dceed5b5b226 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6527,8 +6527,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) err = rdev_change_station(rdev, dev, mac_addr, ¶ms); out_put_vlan: - if (params.vlan) - dev_put(params.vlan); + dev_put(params.vlan); return err; } @@ -6763,8 +6762,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) err = rdev_add_station(rdev, dev, mac_addr, ¶ms); - if (params.vlan) - dev_put(params.vlan); + dev_put(params.vlan); return err; } @@ -8489,8 +8487,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out_free; nl80211_send_scan_start(rdev, wdev); - if (wdev->netdev) - dev_hold(wdev->netdev); + dev_hold(wdev->netdev); return 0; @@ -14860,9 +14857,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, return -ENETDOWN; } - if (dev) - dev_hold(dev); - + dev_hold(dev); info->user_ptr[0] = rdev; } @@ -14884,8 +14879,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; - if (wdev->netdev) - dev_put(wdev->netdev); + dev_put(wdev->netdev); } else { dev_put(info->user_ptr[1]); } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7897b1478c3c..11c68b159324 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -975,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, } #endif - if (wdev->netdev) - dev_put(wdev->netdev); + dev_put(wdev->netdev); kfree(rdev->int_scan_req); rdev->int_scan_req = NULL; diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0b9548ea8477..fcba217f0ae2 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -45,11 +45,13 @@ xdp_monitor xdp_redirect xdp_redirect_cpu xdp_redirect_map +xdp_redirect_map_multi xdp_router_ipv4 xdp_rxq_info xdp_sample_pkts xdp_tx_iptunnel xdpsock +xdpsock_ctrl_proc xsk_fwd testfile.img hbm_out.log diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh index e68b9ee6814b..35db26f736b9 100755 --- a/samples/bpf/test_override_return.sh +++ b/samples/bpf/test_override_return.sh @@ -1,5 +1,6 @@ #!/bin/bash +rm -r tmpmnt rm -f testfile.img dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 DEVICE=$(losetup --show -f testfile.img) diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c index fdcd6580dd73..8be7ce18d3ba 100644 --- a/samples/bpf/tracex7_user.c +++ b/samples/bpf/tracex7_user.c @@ -14,6 +14,11 @@ int main(int argc, char **argv) int ret = 0; FILE *f; + if (!argv[1]) { + fprintf(stderr, "ERROR: Run with the btrfs device argument!\n"); + return 0; + } + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 576411612523..d3ecdc18b9c1 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -792,13 +792,23 @@ int main(int argc, char **argv) n_cpus = get_nprocs_conf(); - /* Notice: choosing he queue size is very important with the - * ixgbe driver, because it's driver page recycling trick is - * dependend on pages being returned quickly. The number of - * out-standing packets in the system must be less-than 2x - * RX-ring size. + /* Notice: Choosing the queue size is very important when CPU is + * configured with power-saving states. + * + * If deepest state take 133 usec to wakeup from (133/10^6). When link + * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can + * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) = + * 166250 bytes. With MTU size packets this is 110 packets, and with + * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets. + * + * Setting default cpumap queue to 2048 as worst-case (small packet) + * should be +64 packet due kthread wakeup call (due to xdp_do_flush) + * worst-case is 2043 packets. + * + * Sysadm can configured system to avoid deep-sleep via: + * tuned-adm profile network-latency */ - qsize = 128+64; + qsize = 2048; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 2d94025b38e9..00ac7b79cddb 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -547,6 +547,7 @@ class PrinterHelpers(Printer): 'struct inode', 'struct socket', 'struct file', + 'struct bpf_timer', ] known_types = { '...', @@ -594,6 +595,7 @@ class PrinterHelpers(Printer): 'struct inode', 'struct socket', 'struct file', + 'struct bpf_timer', } mapped_types = { 'u8': '__u8', diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b0032c42333e..2143f590e3d6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1330,7 +1330,9 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc return SECCLASS_SMC_SOCKET; case PF_XDP: return SECCLASS_XDP_SOCKET; -#if PF_MAX > 45 + case PF_MCTP: + return SECCLASS_MCTP_SOCKET; +#if PF_MAX > 46 #error New address family defined, please update this function. #endif } diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 62d19bccf3de..084757ff4390 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -246,6 +246,8 @@ struct security_class_mapping secclass_map[] = { NULL } }, { "xdp_socket", { COMMON_SOCK_PERMS, NULL } }, + { "mctp_socket", + { COMMON_SOCK_PERMS, NULL } }, { "perf_event", { "open", "cpu", "kernel", "tracepoint", "read", "write", NULL } }, { "lockdown", @@ -255,6 +257,6 @@ struct security_class_mapping secclass_map[] = { { NULL } }; -#if PF_MAX > 45 +#if PF_MAX > 46 #error New address family defined, please update secclass_map. #endif diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index ff4d327a582e..88b28aa7431f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **btf** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | + { **-B** | **--base-btf** } } *COMMANDS* := { **dump** | **help** } @@ -73,6 +74,20 @@ OPTIONS ======= .. include:: common_options.rst + -B, --base-btf *FILE* + Pass a base BTF object. Base BTF objects are typically used + with BTF objects for kernel modules. To avoid duplicating + all kernel symbols required by modules, BTF objects for + modules are "split", they are built incrementally on top of + the kernel (vmlinux) BTF object. So the base BTF reference + should usually point to the kernel BTF. + + When the main BTF object to process (for example, the + module BTF to dump) is passed as a *FILE*, bpftool attempts + to autodetect the path for the base object, and passing + this option is optional. When the main BTF object is passed + through other handles, this option becomes necessary. + EXAMPLES ======== **# bpftool btf dump id 1226** @@ -217,3 +232,34 @@ All the standard ways to specify map or program are supported: **# bpftool btf dump prog tag b88e0a09b1d9759d** **# bpftool btf dump prog pinned /sys/fs/bpf/prog_name** + +| +| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus** +| (or) +| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')** +| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux** + +:: + + [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2 + 'alert' type_id=393 bits_offset=0 + 'ara' type_id=56050 bits_offset=256 + [104849] STRUCT 'alert_data' size=12 vlen=3 + 'addr' type_id=16 bits_offset=0 + 'type' type_id=56053 bits_offset=32 + 'data' type_id=7 bits_offset=64 + [104850] PTR '(anon)' type_id=104848 + [104851] PTR '(anon)' type_id=104849 + [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static + [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static + [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1 + 'ara' type_id=56050 + [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static + [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static + [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2 + 'ara' type_id=56050 + 'id' type_id=56056 + [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static + [104859] FUNC 'smbalert_work' type_id=9695 linkage=static + [104860] FUNC 'smbus_alert' type_id=71367 linkage=static + [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index baee8591ac76..3e4395eede4f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **cgroup** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **list** | **tree** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index dd3771bdbc57..ab9f57ee4c3a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **feature** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **probe** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 7cd6681137f3..2ef2f2df0279 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **gen** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-L** | **--use-loader** } } *COMMAND* := { **object** | **skeleton** | **help** } @@ -152,6 +153,12 @@ OPTIONS ======= .. include:: common_options.rst + -L, --use-loader + For skeletons, generate a "light" skeleton (also known as "loader" + skeleton). A light skeleton contains a loader eBPF program. It does + not use the majority of the libbpf infrastructure, and does not need + libelf. + EXAMPLES ======== **$ cat example1.bpf.c** diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 51f49bead619..471f363a725a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -12,6 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **iter** *COMMAND* + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *COMMANDS* := { **pin** | **help** } ITER COMMANDS diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 5f7db2a837cc..0de90f086238 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **link** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 3d52256ba75f..d0c4abe08aba 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index d8165d530937..1ae0375e8fea 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **net** *COMMAND* - *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index e958ce91de72..ce52798a917d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **perf** *COMMAND* - *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index fe1b38e7e887..91608cb7e44a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -12,7 +12,9 @@ SYNOPSIS **bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | + { **-L** | **--use-loader** } } *COMMANDS* := { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** @@ -48,10 +50,11 @@ PROG COMMANDS | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } | *ATTACH_TYPE* := { -| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** +| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** | } | *METRICs* := { -| **cycles** | **instructions** | **l1d_loads** | **llc_misses** +| **cycles** | **instructions** | **l1d_loads** | **llc_misses** | +| **itlb_misses** | **dtlb_misses** | } @@ -223,6 +226,20 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. + -L, --use-loader + Load program as a "loader" program. This is useful to debug + the generation of such programs. When this option is in + use, bpftool attempts to load the programs from the object + file into the kernel, but does not pin them (therefore, the + *PATH* must not be provided). + + When combined with the **-d**\ \|\ **--debug** option, + additional debug messages are generated, and the execution + of the loader program will use the **bpf_trace_printk**\ () + helper to log each step of loading BTF, creating the maps, + and loading the programs (see **bpftool prog tracelog** as + a way to dump those messages). + EXAMPLES ======== **# bpftool prog show** @@ -326,3 +343,16 @@ EXAMPLES 40176203 cycles (83.05%) 42518139 instructions # 1.06 insns per cycle (83.39%) 123 llc_misses # 2.89 LLC misses per million insns (83.15%) + +| +| Output below is for the trace logs. +| Run in separate terminals: +| **# bpftool prog tracelog** +| **# bpftool prog load -L -d file.o** + +:: + + bpftool-620059 [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5 + bpftool-620059 [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6 + bpftool-620059 [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7 + bpftool-620059 [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0 diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index 506e70ee78e9..02afc0fc14cb 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **dump** | **register** | **unregister** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index e7d949334961..bb23f55bb05a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -18,15 +18,15 @@ SYNOPSIS *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } - *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } - | { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-V** | **--version** } | + { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *MAP-COMMANDS* := - { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** - | **delete** | **pin** | **event_pipe** | **help** } + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | + **delete** | **pin** | **event_pipe** | **help** } - *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** - | **load** | **attach** | **detach** | **help** } + *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | + **load** | **attach** | **detach** | **help** } *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index cc33c5824a2f..88e2bcf16cca 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -260,7 +260,8 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then - local c='--version --json --pretty --bpffs --mapcompat --debug' + local c='--version --json --pretty --bpffs --mapcompat --debug \ + --use-loader --base-btf' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -278,7 +279,7 @@ _bpftool() _sysfs_get_netdevs return 0 ;; - file|pinned) + file|pinned|-B|--base-btf) _filedir return 0 ;; @@ -291,7 +292,8 @@ _bpftool() # Remove all options so completions don't have to deal with them. local i for (( i=1; i < ${#words[@]}; )); do - if [[ ${words[i]::1} == - ]]; then + if [[ ${words[i]::1} == - ]] && + [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then words=( "${words[@]:0:i}" "${words[@]:i+1}" ) [[ $i -le $cword ]] && cword=$(( cword - 1 )) else @@ -343,7 +345,8 @@ _bpftool() local PROG_TYPE='id pinned tag name' local MAP_TYPE='id pinned name' - local METRIC_TYPE='cycles instructions l1d_loads llc_misses' + local METRIC_TYPE='cycles instructions l1d_loads llc_misses \ + itlb_misses dtlb_misses' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -404,8 +407,10 @@ _bpftool() return 0 ;; 5) - COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \ - stream_parser flow_dissector' -- "$cur" ) ) + local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \ + skb_verdict stream_verdict stream_parser \ + flow_dissector' + COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) ) return 0 ;; 6) @@ -464,7 +469,7 @@ _bpftool() case $prev in type) - COMPREPLY=( $( compgen -W "socket kprobe \ + local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \ kretprobe classifier flow_dissector \ action tracepoint raw_tracepoint \ xdp perf_event cgroup/skb cgroup/sock \ @@ -479,8 +484,8 @@ _bpftool() cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ cgroup/setsockopt cgroup/sock_release struct_ops \ - fentry fexit freplace sk_lookup" -- \ - "$cur" ) ) + fentry fexit freplace sk_lookup' + COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) ) return 0 ;; id) @@ -698,15 +703,15 @@ _bpftool() return 0 ;; type) - COMPREPLY=( $( compgen -W 'hash array prog_array \ - perf_event_array percpu_hash percpu_array \ - stack_trace cgroup_array lru_hash \ + local BPFTOOL_MAP_CREATE_TYPES='hash array \ + prog_array perf_event_array percpu_hash \ + percpu_array stack_trace cgroup_array lru_hash \ lru_percpu_hash lpm_trie array_of_maps \ hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack sk_storage \ - struct_ops inode_storage task_storage' -- \ - "$cur" ) ) + struct_ops inode_storage task_storage ringbuf' + COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) return 0 ;; key|value|flags|entries) @@ -1017,34 +1022,37 @@ _bpftool() return 0 ;; attach|detach) - local ATTACH_TYPES='ingress egress sock_create sock_ops \ - device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ + local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \ + sock_create sock_ops device \ + bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ getpeername4 getpeername6 getsockname4 getsockname6 \ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ setsockopt sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' - case $prev in - $command) - _filedir - return 0 - ;; - ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ - post_bind4|post_bind6|connect4|connect6|getpeername4|\ - getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\ - recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release) + # Check for $prev = $command first + if [ $prev = $command ]; then + _filedir + return 0 + # Then check for attach type. This is done outside of the + # "case $prev in" to avoid writing the whole list of attach + # types again as pattern to match (where we cannot reuse + # our variable). + elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 - ;; + fi + # case/esac for the other cases + case $prev in id) _bpftool_get_prog_ids return 0 ;; *) - if ! _bpftool_search_list "$ATTACH_TYPES"; then - COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \ - "$cur" ) ) + if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then + COMPREPLY=( $( compgen -W \ + "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) ) elif [[ "$command" == "attach" ]]; then # We have an attach type on the command line, # but it is not the previous word, or diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 385d5c955cf3..f7e5ff3586c9 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv) } if (!btf) { - err = btf__get_from_id(btf_id, &btf); + btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); + err = libbpf_get_error(btf); if (err) { p_err("get btf by id (%u): %s", btf_id, strerror(err)); goto done; } - if (!btf) { - err = -ENOENT; - p_err("can't find btf with ID (%u)", btf_id); - goto done; - } } if (dump_c) { @@ -985,7 +981,8 @@ static int do_help(int argc, char **argv) " FORMAT := { raw | c }\n" " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-B|--base-btf} }\n" "", bin_name, "btf"); diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 7ca54d046362..9c25286a5c73 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, } info = &prog_info->info; - if (!info->btf_id || !info->nr_func_info || - btf__get_from_id(info->btf_id, &prog_btf)) + if (!info->btf_id || !info->nr_func_info) + goto print; + prog_btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(prog_btf)) goto print; finfo = u64_to_ptr(info->func_info); func_type = btf__type_by_id(prog_btf, finfo->type_id); diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 6e53b1d393f4..3571a281c43f 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -501,7 +501,8 @@ static int do_help(int argc, char **argv) HELP_SPEC_ATTACH_TYPES "\n" " " HELP_SPEC_ATTACH_FLAGS "\n" " " HELP_SPEC_PROGRAM "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index dc6daa193557..d42d930a3ec4 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -67,6 +67,12 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_MODIFY_RETURN] = "mod_ret", [BPF_LSM_MAC] = "lsm_mac", [BPF_SK_LOOKUP] = "sk_lookup", + [BPF_TRACE_ITER] = "trace_iter", + [BPF_XDP_DEVMAP] = "xdp_devmap", + [BPF_XDP_CPUMAP] = "xdp_cpumap", + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", }; void p_err(const char *fmt, ...) diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 40a88df275f9..7f36385aa9e2 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -1005,6 +1005,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " COMPONENT := { kernel | dev NAME }\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 1d71ff8c52fa..d40d92bbf0e4 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1026,7 +1026,8 @@ static int do_help(int argc, char **argv) " %1$s %2$s skeleton FILE [name OBJECT_NAME]\n" " %1$s %2$s help\n" "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-L|--use-loader} }\n" "", bin_name, "gen"); diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 3b1aad7535dd..84a9b01d956d 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -97,7 +97,9 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n" " %1$s %2$s help\n" + "\n" " " HELP_SPEC_MAP "\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, "iter"); diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index e77e1525d20a..8cc3e36f8cc6 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -401,7 +401,8 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " " HELP_SPEC_LINK "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-n|--nomount} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 3ddfd4843738..02eaaf065f65 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -64,7 +64,8 @@ static int do_help(int argc, char **argv) " %s version\n" "\n" " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-V|--version} }\n" "", bin_name, bin_name, bin_name); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index c1cf29798b99..90caa42aac4c 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -57,8 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ - "\t {-m|--mapcompat} | {-n|--nomount} }" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" #define HELP_SPEC_LINK \ diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 09ae0381205b..407071d54ab1 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) } else if (info->btf_value_type_id) { int err; - err = btf__get_from_id(info->btf_id, &btf); - if (err || !btf) { + btf = btf__load_from_kernel_by_id(info->btf_id); + err = libbpf_get_error(btf); + if (err) { p_err("failed to get btf"); - btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH); + btf = ERR_PTR(err); } } @@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key, void *value) { json_writer_t *btf_wtr; - struct btf *btf = NULL; - int err; + struct btf *btf; - err = btf__get_from_id(info->btf_id, &btf); - if (err) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { p_err("failed to get btf"); return; } @@ -1466,8 +1466,9 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage }\n" - " " HELP_SPEC_OPTIONS "\n" + " task_storage }\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-n|--nomount} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index f836d115d7d6..649053704bd7 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -729,6 +729,7 @@ static int do_help(int argc, char **argv) "\n" " " HELP_SPEC_PROGRAM "\n" " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n" + " " HELP_SPEC_OPTIONS " }\n" "\n" "Note: Only xdp and tc attachments are supported now.\n" " For progs attached to cgroups, use \"bpftool cgroup\"\n" diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index ad23934819c7..50de087b0db7 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -231,7 +231,10 @@ static int do_show(int argc, char **argv) static int do_help(int argc, char **argv) { fprintf(stderr, - "Usage: %1$s %2$s { show | list | help }\n" + "Usage: %1$s %2$s { show | list }\n" + " %1$s %2$s help }\n" + "\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cc48726740ad..9c3e343b7d87 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps) struct bpf_map_info map_info; struct btf_var_secinfo *vsi; bool printed_header = false; - struct btf *btf = NULL; unsigned int i, vlen; void *value = NULL; const char *name; + struct btf *btf; int err; if (!num_maps) @@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (!value) return; - err = btf__get_from_id(map_info.btf_id, &btf); - if (err || !btf) + btf = btf__load_from_kernel_by_id(map_info.btf_id); + if (libbpf_get_error(btf)) goto out_free; t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id); @@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, member_len = info->xlated_prog_len; } - if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { - p_err("failed to get btf"); - return -1; + if (info->btf_id) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { + p_err("failed to get btf"); + return -1; + } } func_info = u64_to_ptr(info->func_info); @@ -781,6 +784,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, kernel_syms_destroy(&dd); } + btf__free(btf); + return 0; } @@ -2002,8 +2007,8 @@ static char *profile_target_name(int tgt_fd) struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; const struct btf_type *t; + struct btf *btf = NULL; char *name = NULL; - struct btf *btf; info_linear = bpf_program__get_prog_info_linear( tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); @@ -2012,12 +2017,17 @@ static char *profile_target_name(int tgt_fd) return NULL; } - if (info_linear->info.btf_id == 0 || - btf__get_from_id(info_linear->info.btf_id, &btf)) { + if (info_linear->info.btf_id == 0) { p_err("prog FD %d doesn't have valid btf", tgt_fd); goto out; } + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + if (libbpf_get_error(btf)) { + p_err("failed to load btf for prog FD %d", tgt_fd); + goto out; + } + func_info = u64_to_ptr(info_linear->info.func_info); t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { @@ -2027,6 +2037,7 @@ static char *profile_target_name(int tgt_fd) } name = strdup(btf__name_by_offset(btf, t->name_off)); out: + btf__free(btf); free(info_linear); return name; } @@ -2245,10 +2256,12 @@ static int do_help(int argc, char **argv) " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" - " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" - " flow_dissector }\n" + " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n" + " stream_parser | flow_dissector }\n" " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n" + " {-L|--use-loader} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index b58b91f62ffb..ab2d2290569a 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -572,8 +572,8 @@ static int do_help(int argc, char **argv) " %1$s %2$s unregister STRUCT_OPS_MAP\n" " %1$s %2$s help\n" "\n" - " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n" " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 3ad9301b0f00..de6365b53c9c 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -291,7 +291,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh) sh->sh_addralign = expected; if (gelf_update_shdr(scn, sh) == 0) { - printf("FAILED cannot update section header: %s\n", + pr_err("FAILED cannot update section header: %s\n", elf_errmsg(-1)); return -1; } @@ -317,6 +317,7 @@ static int elf_collect(struct object *obj) elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL); if (!elf) { + close(fd); pr_err("FAILED cannot create ELF descriptor: %s\n", elf_errmsg(-1)); return -1; @@ -484,7 +485,7 @@ static int symbols_resolve(struct object *obj) err = libbpf_get_error(btf); if (err) { pr_err("FAILED: load BTF from %s: %s\n", - obj->path, strerror(-err)); + obj->btf ?: obj->path, strerror(-err)); return -1; } @@ -555,8 +556,7 @@ static int id_patch(struct object *obj, struct btf_id *id) int i; if (!id->id) { - pr_err("FAILED unresolved symbol %s\n", id->name); - return -EINVAL; + pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); } for (i = 0; i < id->addr_cnt; i++) { @@ -734,8 +734,9 @@ int main(int argc, const char **argv) err = 0; out: - if (obj.efile.elf) + if (obj.efile.elf) { elf_end(obj.efile.elf); - close(obj.efile.fd); + close(obj.efile.fd); + } return err; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf9252c7381e..2db6925e04f4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * @@ -3249,7 +3246,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -4780,6 +4777,76 @@ union bpf_attr { * Execute close syscall for given FD. * Return * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4951,6 +5018,11 @@ union bpf_attr { FN(sys_bpf), \ FN(btf_find_by_name_kind), \ FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6077,6 +6149,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d208b2af697f..eb15f319aa57 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -653,6 +653,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 430f6874fa41..94f0a146bb7b 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,3 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index b46760b93bb4..85de4fd50699 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1180,7 +1180,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf__load(struct btf *btf) +int btf__load_into_kernel(struct btf *btf) { __u32 log_buf_size = 0, raw_size; char *log_buf = NULL; @@ -1228,6 +1228,7 @@ done: free(log_buf); return libbpf_err(err); } +int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); int btf__fd(const struct btf *btf) { @@ -1382,21 +1383,35 @@ exit_free: return btf; } -int btf__get_from_id(__u32 id, struct btf **btf) +struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) { - struct btf *res; - int err, btf_fd; + struct btf *btf; + int btf_fd; - *btf = NULL; btf_fd = bpf_btf_get_fd_by_id(id); if (btf_fd < 0) - return libbpf_err(-errno); - - res = btf_get_from_fd(btf_fd, NULL); - err = libbpf_get_error(res); + return libbpf_err_ptr(-errno); + btf = btf_get_from_fd(btf_fd, base_btf); close(btf_fd); + return libbpf_ptr(btf); +} + +struct btf *btf__load_from_kernel_by_id(__u32 id) +{ + return btf__load_from_kernel_by_id_split(id, NULL); +} + +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct btf *res; + int err; + + *btf = NULL; + res = btf__load_from_kernel_by_id(id); + err = libbpf_get_error(res); + if (err) return libbpf_err(err); @@ -4021,7 +4036,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) */ if (d->hypot_adjust_canon) continue; - + if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) d->map[t_id] = c_id; @@ -4394,7 +4409,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d) * Probe few well-known locations for vmlinux kernel image and try to load BTF * data out of it to use for target BTF. */ -struct btf *libbpf_find_kernel_btf(void) +struct btf *btf__load_vmlinux_btf(void) { struct { const char *path_fmt; @@ -4440,6 +4455,16 @@ struct btf *libbpf_find_kernel_btf(void) return libbpf_err_ptr(-ESRCH); } +struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf"))); + +struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf) +{ + char path[80]; + + snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name); + return btf__parse_split(path, vmlinux_btf); +} + int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) { int i, n, err; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b54f1c3ebd57..4a711f990904 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b LIBBPF_API struct btf *btf__parse_raw(const char *path); LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); +LIBBPF_API struct btf *btf__load_vmlinux_btf(void); +LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + +LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); +LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); + LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); +LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, @@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, __u32 expected_value_size, @@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf, LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API struct btf *libbpf_find_kernel_btf(void); - LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, @@ -184,6 +190,25 @@ LIBBPF_API int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, const struct btf_dump_emit_type_decl_opts *opts); + +struct btf_dump_type_data_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + const char *indent_str; + int indent_level; + /* below match "show" flags for bpf_show_snprintf() */ + bool compact; /* no newlines/indentation */ + bool skip_names; /* skip member/type names */ + bool emit_zeroes; /* show 0-valued fields */ + size_t :0; +}; +#define btf_dump_type_data_opts__last_field emit_zeroes + +LIBBPF_API int +btf_dump__dump_type_data(struct btf_dump *d, __u32 id, + const void *data, size_t data_sz, + const struct btf_dump_type_data_opts *opts); + /* * A set of helpers for easier BTF types handling */ diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 5dc6b5172bb3..e4b483f15fb9 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -10,6 +10,8 @@ #include <stddef.h> #include <stdlib.h> #include <string.h> +#include <ctype.h> +#include <endian.h> #include <errno.h> #include <linux/err.h> #include <linux/btf.h> @@ -53,6 +55,26 @@ struct btf_dump_type_aux_state { __u8 referenced: 1; }; +/* indent string length; one indent string is added for each indent level */ +#define BTF_DATA_INDENT_STR_LEN 32 + +/* + * Common internal data for BTF type data dump operations. + */ +struct btf_dump_data { + const void *data_end; /* end of valid data to show */ + bool compact; + bool skip_names; + bool emit_zeroes; + __u8 indent_lvl; /* base indent level */ + char indent_str[BTF_DATA_INDENT_STR_LEN]; + /* below are used during iteration */ + int depth; + bool is_array_member; + bool is_array_terminated; + bool is_array_char; +}; + struct btf_dump { const struct btf *btf; const struct btf_ext *btf_ext; @@ -60,6 +82,7 @@ struct btf_dump { struct btf_dump_opts opts; int ptr_sz; bool strip_mods; + bool skip_anon_defs; int last_id; /* per-type auxiliary state */ @@ -89,6 +112,10 @@ struct btf_dump { * name occurrences */ struct hashmap *ident_names; + /* + * data for typed display; allocated if needed. + */ + struct btf_dump_data *typed_dump; }; static size_t str_hash_fn(const void *key, void *ctx) @@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) break; case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = btf_params(t); - __u16 vlen = btf_vlen(t); + __u16 n = btf_vlen(t); int i; btf_dump_emit_type(d, t->type, cont_id); - for (i = 0; i < vlen; i++, p++) + for (i = 0; i < n; i++, p++) btf_dump_emit_type(d, p->type, cont_id); break; @@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d, static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, const struct btf_type *t) { - btf_dump_printf(d, "%s %s", + btf_dump_printf(d, "%s%s%s", btf_is_struct(t) ? "struct" : "union", + t->name_off ? " " : "", btf_dump_type_name(d, id)); } @@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_UNION: btf_dump_emit_mods(d, decls); /* inline anonymous struct/union */ - if (t->name_off == 0) + if (t->name_off == 0 && !d->skip_anon_defs) btf_dump_emit_struct_def(d, id, t, lvl); else btf_dump_emit_struct_fwd(d, id, t); @@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_ENUM: btf_dump_emit_mods(d, decls); /* inline anonymous enum */ - if (t->name_off == 0) + if (t->name_off == 0 && !d->skip_anon_defs) btf_dump_emit_enum_def(d, id, t, lvl); else btf_dump_emit_enum_fwd(d, id, t); @@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, btf_dump_emit_name(d, fname, last_was_ptr); } +/* show type name as (type_name) */ +static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, + bool top_level) +{ + const struct btf_type *t; + + /* for array members, we don't bother emitting type name for each + * member to avoid the redundancy of + * .name = (char[4])[(char)'f',(char)'o',(char)'o',] + */ + if (d->typed_dump->is_array_member) + return; + + /* avoid type name specification for variable/section; it will be done + * for the associated variable value(s). + */ + t = btf__type_by_id(d->btf, id); + if (btf_is_var(t) || btf_is_datasec(t)) + return; + + if (top_level) + btf_dump_printf(d, "("); + + d->skip_anon_defs = true; + d->strip_mods = true; + btf_dump_emit_type_decl(d, id, "", 0); + d->strip_mods = false; + d->skip_anon_defs = false; + + if (top_level) + btf_dump_printf(d, ")"); +} + /* return number of duplicates (occurrences) of a given name */ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, const char *orig_name) @@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) { return btf_dump_resolve_name(d, id, d->ident_names); } + +static int btf_dump_dump_type_data(struct btf_dump *d, + const char *fname, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset, + __u8 bit_sz); + +static const char *btf_dump_data_newline(struct btf_dump *d) +{ + return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n"; +} + +static const char *btf_dump_data_delim(struct btf_dump *d) +{ + return d->typed_dump->depth == 0 ? "" : ","; +} + +static void btf_dump_data_pfx(struct btf_dump *d) +{ + int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth; + + if (d->typed_dump->compact) + return; + + for (i = 0; i < lvl; i++) + btf_dump_printf(d, "%s", d->typed_dump->indent_str); +} + +/* A macro is used here as btf_type_value[s]() appends format specifiers + * to the format specifier passed in; these do the work of appending + * delimiters etc while the caller simply has to specify the type values + * in the format specifier + value(s). + */ +#define btf_dump_type_values(d, fmt, ...) \ + btf_dump_printf(d, fmt "%s%s", \ + ##__VA_ARGS__, \ + btf_dump_data_delim(d), \ + btf_dump_data_newline(d)) + +static int btf_dump_unsupported_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id) +{ + btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t)); + return -ENOTSUP; +} + +static int btf_dump_get_bitfield_value(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u8 bits_offset, + __u8 bit_sz, + __u64 *value) +{ + __u16 left_shift_bits, right_shift_bits; + __u8 nr_copy_bits, nr_copy_bytes; + const __u8 *bytes = data; + int sz = t->size; + __u64 num = 0; + int i; + + /* Maximum supported bitfield size is 64 bits */ + if (sz > 8) { + pr_warn("unexpected bitfield size %d\n", sz); + return -EINVAL; + } + + /* Bitfield value retrieval is done in two steps; first relevant bytes are + * stored in num, then we left/right shift num to eliminate irrelevant bits. + */ + nr_copy_bits = bit_sz + bits_offset; + nr_copy_bytes = t->size; +#if __BYTE_ORDER == __LITTLE_ENDIAN + for (i = nr_copy_bytes - 1; i >= 0; i--) + num = num * 256 + bytes[i]; +#elif __BYTE_ORDER == __BIG_ENDIAN + for (i = 0; i < nr_copy_bytes; i++) + num = num * 256 + bytes[i]; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + left_shift_bits = 64 - nr_copy_bits; + right_shift_bits = 64 - bit_sz; + + *value = (num << left_shift_bits) >> right_shift_bits; + + return 0; +} + +static int btf_dump_bitfield_check_zero(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + __u64 check_num; + int err; + + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num); + if (err) + return err; + if (check_num == 0) + return -ENODATA; + return 0; +} + +static int btf_dump_bitfield_data(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + __u64 print_num; + int err; + + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num); + if (err) + return err; + + btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num); + + return 0; +} + +/* ints, floats and ptrs */ +static int btf_dump_base_type_check_zero(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + static __u8 bytecmp[16] = {}; + int nr_bytes; + + /* For pointer types, pointer size is not defined on a per-type basis. + * On dump creation however, we store the pointer size. + */ + if (btf_kind(t) == BTF_KIND_PTR) + nr_bytes = d->ptr_sz; + else + nr_bytes = t->size; + + if (nr_bytes < 1 || nr_bytes > 16) { + pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id); + return -EINVAL; + } + + if (memcmp(data, bytecmp, nr_bytes) == 0) + return -ENODATA; + return 0; +} + +static bool ptr_is_aligned(const void *data, int data_sz) +{ + return ((uintptr_t)data) % data_sz == 0; +} + +static int btf_dump_int_data(struct btf_dump *d, + const struct btf_type *t, + __u32 type_id, + const void *data, + __u8 bits_offset) +{ + __u8 encoding = btf_int_encoding(t); + bool sign = encoding & BTF_INT_SIGNED; + int sz = t->size; + + if (sz == 0) { + pr_warn("unexpected size %d for id [%u]\n", sz, type_id); + return -EINVAL; + } + + /* handle packed int data - accesses of integers not aligned on + * int boundaries can cause problems on some platforms. + */ + if (!ptr_is_aligned(data, sz)) + return btf_dump_bitfield_data(d, t, data, 0, 0); + + switch (sz) { + case 16: { + const __u64 *ints = data; + __u64 lsi, msi; + + /* avoid use of __int128 as some 32-bit platforms do not + * support it. + */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + lsi = ints[0]; + msi = ints[1]; +#elif __BYTE_ORDER == __BIG_ENDIAN + lsi = ints[1]; + msi = ints[0]; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + if (msi == 0) + btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi); + else + btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi, + (unsigned long long)lsi); + break; + } + case 8: + if (sign) + btf_dump_type_values(d, "%lld", *(long long *)data); + else + btf_dump_type_values(d, "%llu", *(unsigned long long *)data); + break; + case 4: + if (sign) + btf_dump_type_values(d, "%d", *(__s32 *)data); + else + btf_dump_type_values(d, "%u", *(__u32 *)data); + break; + case 2: + if (sign) + btf_dump_type_values(d, "%d", *(__s16 *)data); + else + btf_dump_type_values(d, "%u", *(__u16 *)data); + break; + case 1: + if (d->typed_dump->is_array_char) { + /* check for null terminator */ + if (d->typed_dump->is_array_terminated) + break; + if (*(char *)data == '\0') { + d->typed_dump->is_array_terminated = true; + break; + } + if (isprint(*(char *)data)) { + btf_dump_type_values(d, "'%c'", *(char *)data); + break; + } + } + if (sign) + btf_dump_type_values(d, "%d", *(__s8 *)data); + else + btf_dump_type_values(d, "%u", *(__u8 *)data); + break; + default: + pr_warn("unexpected sz %d for id [%u]\n", sz, type_id); + return -EINVAL; + } + return 0; +} + +union float_data { + long double ld; + double d; + float f; +}; + +static int btf_dump_float_data(struct btf_dump *d, + const struct btf_type *t, + __u32 type_id, + const void *data) +{ + const union float_data *flp = data; + union float_data fl; + int sz = t->size; + + /* handle unaligned data; copy to local union */ + if (!ptr_is_aligned(data, sz)) { + memcpy(&fl, data, sz); + flp = &fl; + } + + switch (sz) { + case 16: + btf_dump_type_values(d, "%Lf", flp->ld); + break; + case 8: + btf_dump_type_values(d, "%lf", flp->d); + break; + case 4: + btf_dump_type_values(d, "%f", flp->f); + break; + default: + pr_warn("unexpected size %d for id [%u]\n", sz, type_id); + return -EINVAL; + } + return 0; +} + +static int btf_dump_var_data(struct btf_dump *d, + const struct btf_type *v, + __u32 id, + const void *data) +{ + enum btf_func_linkage linkage = btf_var(v)->linkage; + const struct btf_type *t; + const char *l; + __u32 type_id; + + switch (linkage) { + case BTF_FUNC_STATIC: + l = "static "; + break; + case BTF_FUNC_EXTERN: + l = "extern "; + break; + case BTF_FUNC_GLOBAL: + default: + l = ""; + break; + } + + /* format of output here is [linkage] [type] [varname] = (type)value, + * for example "static int cpu_profile_flip = (int)1" + */ + btf_dump_printf(d, "%s", l); + type_id = v->type; + t = btf__type_by_id(d->btf, type_id); + btf_dump_emit_type_cast(d, type_id, false); + btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off)); + return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); +} + +static int btf_dump_array_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_array *array = btf_array(t); + const struct btf_type *elem_type; + __u32 i, elem_size = 0, elem_type_id; + bool is_array_member; + + elem_type_id = array->type; + elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); + elem_size = btf__resolve_size(d->btf, elem_type_id); + if (elem_size <= 0) { + pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); + return -EINVAL; + } + + if (btf_is_int(elem_type)) { + /* + * BTF_INT_CHAR encoding never seems to be set for + * char arrays, so if size is 1 and element is + * printable as a char, we'll do that. + */ + if (elem_size == 1) + d->typed_dump->is_array_char = true; + } + + /* note that we increment depth before calling btf_dump_print() below; + * this is intentional. btf_dump_data_newline() will not print a + * newline for depth 0 (since this leaves us with trailing newlines + * at the end of typed display), so depth is incremented first. + * For similar reasons, we decrement depth before showing the closing + * parenthesis. + */ + d->typed_dump->depth++; + btf_dump_printf(d, "[%s", btf_dump_data_newline(d)); + + /* may be a multidimensional array, so store current "is array member" + * status so we can restore it correctly later. + */ + is_array_member = d->typed_dump->is_array_member; + d->typed_dump->is_array_member = true; + for (i = 0; i < array->nelems; i++, data += elem_size) { + if (d->typed_dump->is_array_terminated) + break; + btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0); + } + d->typed_dump->is_array_member = is_array_member; + d->typed_dump->depth--; + btf_dump_data_pfx(d); + btf_dump_type_values(d, "]"); + + return 0; +} + +static int btf_dump_struct_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_member *m = btf_members(t); + __u16 n = btf_vlen(t); + int i, err; + + /* note that we increment depth before calling btf_dump_print() below; + * this is intentional. btf_dump_data_newline() will not print a + * newline for depth 0 (since this leaves us with trailing newlines + * at the end of typed display), so depth is incremented first. + * For similar reasons, we decrement depth before showing the closing + * parenthesis. + */ + d->typed_dump->depth++; + btf_dump_printf(d, "{%s", btf_dump_data_newline(d)); + + for (i = 0; i < n; i++, m++) { + const struct btf_type *mtype; + const char *mname; + __u32 moffset; + __u8 bit_sz; + + mtype = btf__type_by_id(d->btf, m->type); + mname = btf_name_of(d, m->name_off); + moffset = btf_member_bit_offset(t, i); + + bit_sz = btf_member_bitfield_size(t, i); + err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8, + moffset % 8, bit_sz); + if (err < 0) + return err; + } + d->typed_dump->depth--; + btf_dump_data_pfx(d); + btf_dump_type_values(d, "}"); + return err; +} + +union ptr_data { + unsigned int p; + unsigned long long lp; +}; + +static int btf_dump_ptr_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { + btf_dump_type_values(d, "%p", *(void **)data); + } else { + union ptr_data pt; + + memcpy(&pt, data, d->ptr_sz); + if (d->ptr_sz == 4) + btf_dump_type_values(d, "0x%x", pt.p); + else + btf_dump_type_values(d, "0x%llx", pt.lp); + } + return 0; +} + +static int btf_dump_get_enum_value(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u32 id, + __s64 *value) +{ + int sz = t->size; + + /* handle unaligned enum value */ + if (!ptr_is_aligned(data, sz)) { + __u64 val; + int err; + + err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val); + if (err) + return err; + *value = (__s64)val; + return 0; + } + + switch (t->size) { + case 8: + *value = *(__s64 *)data; + return 0; + case 4: + *value = *(__s32 *)data; + return 0; + case 2: + *value = *(__s16 *)data; + return 0; + case 1: + *value = *(__s8 *)data; + return 0; + default: + pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); + return -EINVAL; + } +} + +static int btf_dump_enum_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_enum *e; + __s64 value; + int i, err; + + err = btf_dump_get_enum_value(d, t, data, id, &value); + if (err) + return err; + + for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { + if (value != e->val) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } + + btf_dump_type_values(d, "%d", value); + return 0; +} + +static int btf_dump_datasec_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_var_secinfo *vsi; + const struct btf_type *var; + __u32 i; + int err; + + btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off)); + + for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) { + var = btf__type_by_id(d->btf, vsi->type); + err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0); + if (err < 0) + return err; + btf_dump_printf(d, ";"); + } + return 0; +} + +/* return size of type, or if base type overflows, return -E2BIG. */ +static int btf_dump_type_data_check_overflow(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset) +{ + __s64 size = btf__resolve_size(d->btf, id); + + if (size < 0 || size >= INT_MAX) { + pr_warn("unexpected size [%zu] for id [%u]\n", + (size_t)size, id); + return -EINVAL; + } + + /* Only do overflow checking for base types; we do not want to + * avoid showing part of a struct, union or array, even if we + * do not have enough data to show the full object. By + * restricting overflow checking to base types we can ensure + * that partial display succeeds, while avoiding overflowing + * and using bogus data for display. + */ + t = skip_mods_and_typedefs(d->btf, id, NULL); + if (!t) { + pr_warn("unexpected error skipping mods/typedefs for id [%u]\n", + id); + return -EINVAL; + } + + switch (btf_kind(t)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_PTR: + case BTF_KIND_ENUM: + if (data + bits_offset / 8 + size > d->typed_dump->data_end) + return -E2BIG; + break; + default: + break; + } + return (int)size; +} + +static int btf_dump_type_data_check_zero(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + __s64 value; + int i, err; + + /* toplevel exceptions; we show zero values if + * - we ask for them (emit_zeros) + * - if we are at top-level so we see "struct empty { }" + * - or if we are an array member and the array is non-empty and + * not a char array; we don't want to be in a situation where we + * have an integer array 0, 1, 0, 1 and only show non-zero values. + * If the array contains zeroes only, or is a char array starting + * with a '\0', the array-level check_zero() will prevent showing it; + * we are concerned with determining zero value at the array member + * level here. + */ + if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 || + (d->typed_dump->is_array_member && + !d->typed_dump->is_array_char)) + return 0; + + t = skip_mods_and_typedefs(d->btf, id, NULL); + + switch (btf_kind(t)) { + case BTF_KIND_INT: + if (bit_sz) + return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz); + return btf_dump_base_type_check_zero(d, t, id, data); + case BTF_KIND_FLOAT: + case BTF_KIND_PTR: + return btf_dump_base_type_check_zero(d, t, id, data); + case BTF_KIND_ARRAY: { + const struct btf_array *array = btf_array(t); + const struct btf_type *elem_type; + __u32 elem_type_id, elem_size; + bool ischar; + + elem_type_id = array->type; + elem_size = btf__resolve_size(d->btf, elem_type_id); + elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); + + ischar = btf_is_int(elem_type) && elem_size == 1; + + /* check all elements; if _any_ element is nonzero, all + * of array is displayed. We make an exception however + * for char arrays where the first element is 0; these + * are considered zeroed also, even if later elements are + * non-zero because the string is terminated. + */ + for (i = 0; i < array->nelems; i++) { + if (i == 0 && ischar && *(char *)data == 0) + return -ENODATA; + err = btf_dump_type_data_check_zero(d, elem_type, + elem_type_id, + data + + (i * elem_size), + bits_offset, 0); + if (err != -ENODATA) + return err; + } + return -ENODATA; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + __u16 n = btf_vlen(t); + + /* if any struct/union member is non-zero, the struct/union + * is considered non-zero and dumped. + */ + for (i = 0; i < n; i++, m++) { + const struct btf_type *mtype; + __u32 moffset; + + mtype = btf__type_by_id(d->btf, m->type); + moffset = btf_member_bit_offset(t, i); + + /* btf_int_bits() does not store member bitfield size; + * bitfield size needs to be stored here so int display + * of member can retrieve it. + */ + bit_sz = btf_member_bitfield_size(t, i); + err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8, + moffset % 8, bit_sz); + if (err != ENODATA) + return err; + } + return -ENODATA; + } + case BTF_KIND_ENUM: + err = btf_dump_get_enum_value(d, t, data, id, &value); + if (err) + return err; + if (value == 0) + return -ENODATA; + return 0; + default: + return 0; + } +} + +/* returns size of data dumped, or error. */ +static int btf_dump_dump_type_data(struct btf_dump *d, + const char *fname, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + int size, err; + + size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); + if (size < 0) + return size; + err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); + if (err) { + /* zeroed data is expected and not an error, so simply skip + * dumping such data. Record other errors however. + */ + if (err == -ENODATA) + return size; + return err; + } + btf_dump_data_pfx(d); + + if (!d->typed_dump->skip_names) { + if (fname && strlen(fname) > 0) + btf_dump_printf(d, ".%s = ", fname); + btf_dump_emit_type_cast(d, id, true); + } + + t = skip_mods_and_typedefs(d->btf, id, NULL); + + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + case BTF_KIND_FWD: + case BTF_KIND_FUNC: + case BTF_KIND_FUNC_PROTO: + err = btf_dump_unsupported_data(d, t, id); + break; + case BTF_KIND_INT: + if (bit_sz) + err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz); + else + err = btf_dump_int_data(d, t, id, data, bits_offset); + break; + case BTF_KIND_FLOAT: + err = btf_dump_float_data(d, t, id, data); + break; + case BTF_KIND_PTR: + err = btf_dump_ptr_data(d, t, id, data); + break; + case BTF_KIND_ARRAY: + err = btf_dump_array_data(d, t, id, data); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + err = btf_dump_struct_data(d, t, id, data); + break; + case BTF_KIND_ENUM: + /* handle bitfield and int enum values */ + if (bit_sz) { + __u64 print_num; + __s64 enum_val; + + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, + &print_num); + if (err) + break; + enum_val = (__s64)print_num; + err = btf_dump_enum_data(d, t, id, &enum_val); + } else + err = btf_dump_enum_data(d, t, id, data); + break; + case BTF_KIND_VAR: + err = btf_dump_var_data(d, t, id, data); + break; + case BTF_KIND_DATASEC: + err = btf_dump_datasec_data(d, t, id, data); + break; + default: + pr_warn("unexpected kind [%u] for id [%u]\n", + BTF_INFO_KIND(t->info), id); + return -EINVAL; + } + if (err < 0) + return err; + return size; +} + +int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, + const void *data, size_t data_sz, + const struct btf_dump_type_data_opts *opts) +{ + struct btf_dump_data typed_dump = {}; + const struct btf_type *t; + int ret; + + if (!OPTS_VALID(opts, btf_dump_type_data_opts)) + return libbpf_err(-EINVAL); + + t = btf__type_by_id(d->btf, id); + if (!t) + return libbpf_err(-ENOENT); + + d->typed_dump = &typed_dump; + d->typed_dump->data_end = data + data_sz; + d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0); + + /* default indent string is a tab */ + if (!opts->indent_str) + d->typed_dump->indent_str[0] = '\t'; + else + strncat(d->typed_dump->indent_str, opts->indent_str, + sizeof(d->typed_dump->indent_str) - 1); + + d->typed_dump->compact = OPTS_GET(opts, compact, false); + d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); + d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + + ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); + + d->typed_dump = NULL; + + return libbpf_err(ret); +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6f5e2757bb3c..cb106e8c42cb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -498,6 +498,10 @@ struct bpf_object { * it at load time. */ struct btf *btf_vmlinux; + /* Path to the custom BTF to be used for BPF CO-RE relocations as an + * override for vmlinux BTF. + */ + char *btf_custom_path; /* vmlinux BTF override for CO-RE relocations */ struct btf *btf_vmlinux_override; /* Lazily initialized kernel module BTFs */ @@ -591,11 +595,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn) insn->off == 0; } -static bool is_ldimm64_insn(struct bpf_insn *insn) -{ - return insn->code == (BPF_LD | BPF_IMM | BPF_DW); -} - static bool is_call_insn(const struct bpf_insn *insn) { return insn->code == (BPF_JMP | BPF_CALL); @@ -2645,8 +2644,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) struct bpf_program *prog; int i; - /* CO-RE relocations need kernel BTF */ - if (obj->btf_ext && obj->btf_ext->core_relo_info.len) + /* CO-RE relocations need kernel BTF, only when btf_custom_path + * is not specified + */ + if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) return true; /* Support for typed ksyms needs kernel BTF */ @@ -2679,7 +2680,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) if (!force && !obj_needs_vmlinux_btf(obj)) return 0; - obj->btf_vmlinux = libbpf_find_kernel_btf(); + obj->btf_vmlinux = btf__load_vmlinux_btf(); err = libbpf_get_error(obj->btf_vmlinux); if (err) { pr_warn("Error loading vmlinux BTF: %d\n", err); @@ -2768,7 +2769,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) */ btf__set_fd(kern_btf, 0); } else { - err = btf__load(kern_btf); + err = btf__load_into_kernel(kern_btf); } if (sanitize) { if (!err) { @@ -3894,6 +3895,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) return 0; } +static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) +{ + char file[PATH_MAX], buff[4096]; + FILE *fp; + __u32 val; + int err; + + snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + memset(info, 0, sizeof(*info)); + + fp = fopen(file, "r"); + if (!fp) { + err = -errno; + pr_warn("failed to open %s: %d. No procfs support?\n", file, + err); + return err; + } + + while (fgets(buff, sizeof(buff), fp)) { + if (sscanf(buff, "map_type:\t%u", &val) == 1) + info->type = val; + else if (sscanf(buff, "key_size:\t%u", &val) == 1) + info->key_size = val; + else if (sscanf(buff, "value_size:\t%u", &val) == 1) + info->value_size = val; + else if (sscanf(buff, "max_entries:\t%u", &val) == 1) + info->max_entries = val; + else if (sscanf(buff, "map_flags:\t%i", &val) == 1) + info->map_flags = val; + } + + fclose(fp); + + return 0; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -3902,6 +3939,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) char *new_name; err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err && errno == EINVAL) + err = bpf_get_map_info_from_fdinfo(fd, &info); if (err) return libbpf_err(err); @@ -4381,12 +4420,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) struct bpf_map_info map_info = {}; char msg[STRERR_BUFSIZE]; __u32 map_info_len; + int err; map_info_len = sizeof(map_info); - if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { - pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + if (err && errno == EINVAL) + err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); + if (err) { + pr_warn("failed to get map info for map FD %d: %s\n", map_fd, + libbpf_strerror_r(errno, msg, sizeof(msg))); return false; } @@ -4479,6 +4522,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b { struct bpf_create_map_attr create_attr; struct bpf_map_def *def = &map->def; + int err = 0; memset(&create_attr, 0, sizeof(create_attr)); @@ -4521,8 +4565,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { - int err; - err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", @@ -4547,8 +4589,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { char *cp, errmsg[STRERR_BUFSIZE]; - int err = -errno; + err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", map->name, cp, err); @@ -4560,8 +4602,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b map->fd = bpf_create_map_xattr(&create_attr); } - if (map->fd < 0) - return -errno; + err = map->fd < 0 ? -errno : 0; if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { if (obj->gen_loader) @@ -4570,7 +4611,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b zfree(&map->inner_map); } - return 0; + return err; } static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) @@ -4616,10 +4657,13 @@ bpf_object__create_maps(struct bpf_object *obj) char *cp, errmsg[STRERR_BUFSIZE]; unsigned int i, j; int err; + bool retried; for (i = 0; i < obj->nr_maps; i++) { map = &obj->maps[i]; + retried = false; +retry: if (map->pin_path) { err = bpf_object__reuse_map(map); if (err) { @@ -4627,6 +4671,12 @@ bpf_object__create_maps(struct bpf_object *obj) map->name); goto err_out; } + if (retried && map->fd < 0) { + pr_warn("map '%s': cannot find pinned map\n", + map->name); + err = -ENOENT; + goto err_out; + } } if (map->fd >= 0) { @@ -4660,9 +4710,13 @@ bpf_object__create_maps(struct bpf_object *obj) if (map->pin_path && !map->pinned) { err = bpf_map__pin(map, NULL); if (err) { + zclose(map->fd); + if (!retried && err == -EEXIST) { + retried = true; + goto retry; + } pr_warn("map '%s': failed to auto-pin at '%s': %d\n", map->name, map->pin_path, err); - zclose(map->fd); goto err_out; } } @@ -4679,279 +4733,6 @@ err_out: return err; } -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { - __u32 type_id; /* struct/union type or array element type */ - __u32 idx; /* field index or array index */ - const char *name; /* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { - const struct btf *btf; - /* high-level spec: named fields and array indices only */ - struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; - /* original unresolved (no skip_mods_or_typedefs) root type ID */ - __u32 root_type_id; - /* CO-RE relocation kind */ - enum bpf_core_relo_kind relo_kind; - /* high-level spec length */ - int len; - /* raw, low-level spec: 1-to-1 with accessor spec string */ - int raw_spec[BPF_CORE_SPEC_MAX_LEN]; - /* raw spec length */ - int raw_len; - /* field bit offset represented by spec */ - __u32 bit_offset; -}; - -static bool str_is_empty(const char *s) -{ - return !s || !s[0]; -} - -static bool is_flex_arr(const struct btf *btf, - const struct bpf_core_accessor *acc, - const struct btf_array *arr) -{ - const struct btf_type *t; - - /* not a flexible array, if not inside a struct or has non-zero size */ - if (!acc->name || arr->nelems > 0) - return false; - - /* has to be the last member of enclosing struct */ - t = btf__type_by_id(btf, acc->type_id); - return acc->idx == btf_vlen(t) - 1; -} - -static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_FIELD_BYTE_OFFSET: return "byte_off"; - case BPF_FIELD_BYTE_SIZE: return "byte_sz"; - case BPF_FIELD_EXISTS: return "field_exists"; - case BPF_FIELD_SIGNED: return "signed"; - case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; - case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; - case BPF_TYPE_ID_LOCAL: return "local_type_id"; - case BPF_TYPE_ID_TARGET: return "target_type_id"; - case BPF_TYPE_EXISTS: return "type_exists"; - case BPF_TYPE_SIZE: return "type_size"; - case BPF_ENUMVAL_EXISTS: return "enumval_exists"; - case BPF_ENUMVAL_VALUE: return "enumval_value"; - default: return "unknown"; - } -} - -static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_FIELD_BYTE_OFFSET: - case BPF_FIELD_BYTE_SIZE: - case BPF_FIELD_EXISTS: - case BPF_FIELD_SIGNED: - case BPF_FIELD_LSHIFT_U64: - case BPF_FIELD_RSHIFT_U64: - return true; - default: - return false; - } -} - -static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_TYPE_ID_LOCAL: - case BPF_TYPE_ID_TARGET: - case BPF_TYPE_EXISTS: - case BPF_TYPE_SIZE: - return true; - default: - return false; - } -} - -static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_ENUMVAL_EXISTS: - case BPF_ENUMVAL_VALUE: - return true; - default: - return false; - } -} - -/* - * Turn bpf_core_relo into a low- and high-level spec representation, - * validating correctness along the way, as well as calculating resulting - * field bit offset, specified by accessor string. Low-level spec captures - * every single level of nestedness, including traversing anonymous - * struct/union members. High-level one only captures semantically meaningful - * "turning points": named fields and array indicies. - * E.g., for this case: - * - * struct sample { - * int __unimportant; - * struct { - * int __1; - * int __2; - * int a[7]; - * }; - * }; - * - * struct sample *s = ...; - * - * int x = &s->a[3]; // access string = '0:1:2:3' - * - * Low-level spec has 1:1 mapping with each element of access string (it's - * just a parsed access string representation): [0, 1, 2, 3]. - * - * High-level spec will capture only 3 points: - * - intial zero-index access by pointer (&s->... is the same as &s[0]...); - * - field 'a' access (corresponds to '2' in low-level spec); - * - array element #3 access (corresponds to '3' in low-level spec). - * - * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, - * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their - * spec and raw_spec are kept empty. - * - * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access - * string to specify enumerator's value index that need to be relocated. - */ -static int bpf_core_parse_spec(const struct btf *btf, - __u32 type_id, - const char *spec_str, - enum bpf_core_relo_kind relo_kind, - struct bpf_core_spec *spec) -{ - int access_idx, parsed_len, i; - struct bpf_core_accessor *acc; - const struct btf_type *t; - const char *name; - __u32 id; - __s64 sz; - - if (str_is_empty(spec_str) || *spec_str == ':') - return -EINVAL; - - memset(spec, 0, sizeof(*spec)); - spec->btf = btf; - spec->root_type_id = type_id; - spec->relo_kind = relo_kind; - - /* type-based relocations don't have a field access string */ - if (core_relo_is_type_based(relo_kind)) { - if (strcmp(spec_str, "0")) - return -EINVAL; - return 0; - } - - /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ - while (*spec_str) { - if (*spec_str == ':') - ++spec_str; - if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) - return -EINVAL; - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - spec_str += parsed_len; - spec->raw_spec[spec->raw_len++] = access_idx; - } - - if (spec->raw_len == 0) - return -EINVAL; - - t = skip_mods_and_typedefs(btf, type_id, &id); - if (!t) - return -EINVAL; - - access_idx = spec->raw_spec[0]; - acc = &spec->spec[0]; - acc->type_id = id; - acc->idx = access_idx; - spec->len++; - - if (core_relo_is_enumval_based(relo_kind)) { - if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) - return -EINVAL; - - /* record enumerator name in a first accessor */ - acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); - return 0; - } - - if (!core_relo_is_field_based(relo_kind)) - return -EINVAL; - - sz = btf__resolve_size(btf, id); - if (sz < 0) - return sz; - spec->bit_offset = access_idx * sz * 8; - - for (i = 1; i < spec->raw_len; i++) { - t = skip_mods_and_typedefs(btf, id, &id); - if (!t) - return -EINVAL; - - access_idx = spec->raw_spec[i]; - acc = &spec->spec[spec->len]; - - if (btf_is_composite(t)) { - const struct btf_member *m; - __u32 bit_offset; - - if (access_idx >= btf_vlen(t)) - return -EINVAL; - - bit_offset = btf_member_bit_offset(t, access_idx); - spec->bit_offset += bit_offset; - - m = btf_members(t) + access_idx; - if (m->name_off) { - name = btf__name_by_offset(btf, m->name_off); - if (str_is_empty(name)) - return -EINVAL; - - acc->type_id = id; - acc->idx = access_idx; - acc->name = name; - spec->len++; - } - - id = m->type; - } else if (btf_is_array(t)) { - const struct btf_array *a = btf_array(t); - bool flex; - - t = skip_mods_and_typedefs(btf, a->type, &id); - if (!t) - return -EINVAL; - - flex = is_flex_arr(btf, acc - 1, a); - if (!flex && access_idx >= a->nelems) - return -EINVAL; - - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->len++; - - sz = btf__resolve_size(btf, id); - if (sz < 0) - return sz; - spec->bit_offset += access_idx * sz * 8; - } else { - pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - type_id, spec_str, i, id, btf_kind_str(t)); - return -EINVAL; - } - } - - return 0; -} - static bool bpf_core_is_flavor_sep(const char *s) { /* check X___Y name pattern, where X and Y are not underscores */ @@ -4964,7 +4745,7 @@ static bool bpf_core_is_flavor_sep(const char *s) * before last triple underscore. Struct name part after last triple * underscore is ignored by BPF CO-RE relocation during relocation matching. */ -static size_t bpf_core_essential_name_len(const char *name) +size_t bpf_core_essential_name_len(const char *name) { size_t n = strlen(name); int i; @@ -4976,34 +4757,20 @@ static size_t bpf_core_essential_name_len(const char *name) return n; } -struct core_cand -{ - const struct btf *btf; - const struct btf_type *t; - const char *name; - __u32 id; -}; - -/* dynamically sized list of type IDs and its associated struct btf */ -struct core_cand_list { - struct core_cand *cands; - int len; -}; - -static void bpf_core_free_cands(struct core_cand_list *cands) +static void bpf_core_free_cands(struct bpf_core_cand_list *cands) { free(cands->cands); free(cands); } -static int bpf_core_add_cands(struct core_cand *local_cand, +static int bpf_core_add_cands(struct bpf_core_cand *local_cand, size_t local_essent_len, const struct btf *targ_btf, const char *targ_btf_name, int targ_start_id, - struct core_cand_list *cands) + struct bpf_core_cand_list *cands) { - struct core_cand *new_cands, *cand; + struct bpf_core_cand *new_cands, *cand; const struct btf_type *t; const char *targ_name; size_t targ_essent_len; @@ -5139,11 +4906,11 @@ err_out: return 0; } -static struct core_cand_list * +static struct bpf_core_cand_list * bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) { - struct core_cand local_cand = {}; - struct core_cand_list *cands; + struct bpf_core_cand local_cand = {}; + struct bpf_core_cand_list *cands; const struct btf *main_btf; size_t local_essent_len; int err, i; @@ -5197,165 +4964,6 @@ err_out: return ERR_PTR(err); } -/* Check two types for compatibility for the purpose of field access - * relocation. const/volatile/restrict and typedefs are skipped to ensure we - * are relocating semantically compatible entities: - * - any two STRUCTs/UNIONs are compatible and can be mixed; - * - any two FWDs are compatible, if their names match (modulo flavor suffix); - * - any two PTRs are always compatible; - * - for ENUMs, names should be the same (ignoring flavor suffix) or at - * least one of enums should be anonymous; - * - for ENUMs, check sizes, names are ignored; - * - for INT, size and signedness are ignored; - * - any two FLOATs are always compatible; - * - for ARRAY, dimensionality is ignored, element types are checked for - * compatibility recursively; - * - everything else shouldn't be ever a target of relocation. - * These rules are not set in stone and probably will be adjusted as we get - * more experience with using BPF CO-RE relocations. - */ -static int bpf_core_fields_are_compat(const struct btf *local_btf, - __u32 local_id, - const struct btf *targ_btf, - __u32 targ_id) -{ - const struct btf_type *local_type, *targ_type; - -recur: - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (btf_is_composite(local_type) && btf_is_composite(targ_type)) - return 1; - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_PTR: - case BTF_KIND_FLOAT: - return 1; - case BTF_KIND_FWD: - case BTF_KIND_ENUM: { - const char *local_name, *targ_name; - size_t local_len, targ_len; - - local_name = btf__name_by_offset(local_btf, - local_type->name_off); - targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); - local_len = bpf_core_essential_name_len(local_name); - targ_len = bpf_core_essential_name_len(targ_name); - /* one of them is anonymous or both w/ same flavor-less names */ - return local_len == 0 || targ_len == 0 || - (local_len == targ_len && - strncmp(local_name, targ_name, local_len) == 0); - } - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && - btf_int_offset(targ_type) == 0; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - default: - pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", - btf_kind(local_type), local_id, targ_id); - return 0; - } -} - -/* - * Given single high-level named field accessor in local type, find - * corresponding high-level accessor for a target type. Along the way, - * maintain low-level spec for target as well. Also keep updating target - * bit offset. - * - * Searching is performed through recursive exhaustive enumeration of all - * fields of a struct/union. If there are any anonymous (embedded) - * structs/unions, they are recursively searched as well. If field with - * desired name is found, check compatibility between local and target types, - * before returning result. - * - * 1 is returned, if field is found. - * 0 is returned if no compatible field is found. - * <0 is returned on error. - */ -static int bpf_core_match_member(const struct btf *local_btf, - const struct bpf_core_accessor *local_acc, - const struct btf *targ_btf, - __u32 targ_id, - struct bpf_core_spec *spec, - __u32 *next_targ_id) -{ - const struct btf_type *local_type, *targ_type; - const struct btf_member *local_member, *m; - const char *local_name, *targ_name; - __u32 local_id; - int i, n, found; - - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!targ_type) - return -EINVAL; - if (!btf_is_composite(targ_type)) - return 0; - - local_id = local_acc->type_id; - local_type = btf__type_by_id(local_btf, local_id); - local_member = btf_members(local_type) + local_acc->idx; - local_name = btf__name_by_offset(local_btf, local_member->name_off); - - n = btf_vlen(targ_type); - m = btf_members(targ_type); - for (i = 0; i < n; i++, m++) { - __u32 bit_offset; - - bit_offset = btf_member_bit_offset(targ_type, i); - - /* too deep struct/union/array nesting */ - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - - /* speculate this member will be the good one */ - spec->bit_offset += bit_offset; - spec->raw_spec[spec->raw_len++] = i; - - targ_name = btf__name_by_offset(targ_btf, m->name_off); - if (str_is_empty(targ_name)) { - /* embedded struct/union, we need to go deeper */ - found = bpf_core_match_member(local_btf, local_acc, - targ_btf, m->type, - spec, next_targ_id); - if (found) /* either found or error */ - return found; - } else if (strcmp(local_name, targ_name) == 0) { - /* matching named field */ - struct bpf_core_accessor *targ_acc; - - targ_acc = &spec->spec[spec->len++]; - targ_acc->type_id = targ_id; - targ_acc->idx = i; - targ_acc->name = targ_name; - - *next_targ_id = m->type; - found = bpf_core_fields_are_compat(local_btf, - local_member->type, - targ_btf, m->type); - if (!found) - spec->len--; /* pop accessor */ - return found; - } - /* member turned out not to be what we looked for */ - spec->bit_offset -= bit_offset; - spec->raw_len--; - } - - return 0; -} - /* Check local and target types for compatibility. This check is used for * type-based CO-RE relocations and follow slightly different rules than * field-based relocations. This function assumes that root types were already @@ -5375,8 +4983,8 @@ static int bpf_core_match_member(const struct btf *local_btf, * These rules are not set in stone and probably will be adjusted as we get * more experience with using BPF CO-RE relocations. */ -static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, - const struct btf *targ_btf, __u32 targ_id) +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id) { const struct btf_type *local_type, *targ_type; int depth = 32; /* max recursion depth */ @@ -5450,671 +5058,6 @@ recur: } } -/* - * Try to match local spec to a target type and, if successful, produce full - * target spec (high-level, low-level + bit offset). - */ -static int bpf_core_spec_match(struct bpf_core_spec *local_spec, - const struct btf *targ_btf, __u32 targ_id, - struct bpf_core_spec *targ_spec) -{ - const struct btf_type *targ_type; - const struct bpf_core_accessor *local_acc; - struct bpf_core_accessor *targ_acc; - int i, sz, matched; - - memset(targ_spec, 0, sizeof(*targ_spec)); - targ_spec->btf = targ_btf; - targ_spec->root_type_id = targ_id; - targ_spec->relo_kind = local_spec->relo_kind; - - if (core_relo_is_type_based(local_spec->relo_kind)) { - return bpf_core_types_are_compat(local_spec->btf, - local_spec->root_type_id, - targ_btf, targ_id); - } - - local_acc = &local_spec->spec[0]; - targ_acc = &targ_spec->spec[0]; - - if (core_relo_is_enumval_based(local_spec->relo_kind)) { - size_t local_essent_len, targ_essent_len; - const struct btf_enum *e; - const char *targ_name; - - /* has to resolve to an enum */ - targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); - if (!btf_is_enum(targ_type)) - return 0; - - local_essent_len = bpf_core_essential_name_len(local_acc->name); - - for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { - targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); - targ_essent_len = bpf_core_essential_name_len(targ_name); - if (targ_essent_len != local_essent_len) - continue; - if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { - targ_acc->type_id = targ_id; - targ_acc->idx = i; - targ_acc->name = targ_name; - targ_spec->len++; - targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; - targ_spec->raw_len++; - return 1; - } - } - return 0; - } - - if (!core_relo_is_field_based(local_spec->relo_kind)) - return -EINVAL; - - for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { - targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, - &targ_id); - if (!targ_type) - return -EINVAL; - - if (local_acc->name) { - matched = bpf_core_match_member(local_spec->btf, - local_acc, - targ_btf, targ_id, - targ_spec, &targ_id); - if (matched <= 0) - return matched; - } else { - /* for i=0, targ_id is already treated as array element - * type (because it's the original struct), for others - * we should find array element type first - */ - if (i > 0) { - const struct btf_array *a; - bool flex; - - if (!btf_is_array(targ_type)) - return 0; - - a = btf_array(targ_type); - flex = is_flex_arr(targ_btf, targ_acc - 1, a); - if (!flex && local_acc->idx >= a->nelems) - return 0; - if (!skip_mods_and_typedefs(targ_btf, a->type, - &targ_id)) - return -EINVAL; - } - - /* too deep struct/union/array nesting */ - if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - - targ_acc->type_id = targ_id; - targ_acc->idx = local_acc->idx; - targ_acc->name = NULL; - targ_spec->len++; - targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; - targ_spec->raw_len++; - - sz = btf__resolve_size(targ_btf, targ_id); - if (sz < 0) - return sz; - targ_spec->bit_offset += local_acc->idx * sz * 8; - } - } - - return 1; -} - -static int bpf_core_calc_field_relo(const struct bpf_program *prog, - const struct bpf_core_relo *relo, - const struct bpf_core_spec *spec, - __u32 *val, __u32 *field_sz, __u32 *type_id, - bool *validate) -{ - const struct bpf_core_accessor *acc; - const struct btf_type *t; - __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; - const struct btf_member *m; - const struct btf_type *mt; - bool bitfield; - __s64 sz; - - *field_sz = 0; - - if (relo->kind == BPF_FIELD_EXISTS) { - *val = spec ? 1 : 0; - return 0; - } - - if (!spec) - return -EUCLEAN; /* request instruction poisoning */ - - acc = &spec->spec[spec->len - 1]; - t = btf__type_by_id(spec->btf, acc->type_id); - - /* a[n] accessor needs special handling */ - if (!acc->name) { - if (relo->kind == BPF_FIELD_BYTE_OFFSET) { - *val = spec->bit_offset / 8; - /* remember field size for load/store mem size */ - sz = btf__resolve_size(spec->btf, acc->type_id); - if (sz < 0) - return -EINVAL; - *field_sz = sz; - *type_id = acc->type_id; - } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { - sz = btf__resolve_size(spec->btf, acc->type_id); - if (sz < 0) - return -EINVAL; - *val = sz; - } else { - pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", - prog->name, relo->kind, relo->insn_off / 8); - return -EINVAL; - } - if (validate) - *validate = true; - return 0; - } - - m = btf_members(t) + acc->idx; - mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); - bit_off = spec->bit_offset; - bit_sz = btf_member_bitfield_size(t, acc->idx); - - bitfield = bit_sz > 0; - if (bitfield) { - byte_sz = mt->size; - byte_off = bit_off / 8 / byte_sz * byte_sz; - /* figure out smallest int size necessary for bitfield load */ - while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { - if (byte_sz >= 8) { - /* bitfield can't be read with 64-bit read */ - pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", - prog->name, relo->kind, relo->insn_off / 8); - return -E2BIG; - } - byte_sz *= 2; - byte_off = bit_off / 8 / byte_sz * byte_sz; - } - } else { - sz = btf__resolve_size(spec->btf, field_type_id); - if (sz < 0) - return -EINVAL; - byte_sz = sz; - byte_off = spec->bit_offset / 8; - bit_sz = byte_sz * 8; - } - - /* for bitfields, all the relocatable aspects are ambiguous and we - * might disagree with compiler, so turn off validation of expected - * value, except for signedness - */ - if (validate) - *validate = !bitfield; - - switch (relo->kind) { - case BPF_FIELD_BYTE_OFFSET: - *val = byte_off; - if (!bitfield) { - *field_sz = byte_sz; - *type_id = field_type_id; - } - break; - case BPF_FIELD_BYTE_SIZE: - *val = byte_sz; - break; - case BPF_FIELD_SIGNED: - /* enums will be assumed unsigned */ - *val = btf_is_enum(mt) || - (btf_int_encoding(mt) & BTF_INT_SIGNED); - if (validate) - *validate = true; /* signedness is never ambiguous */ - break; - case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN - *val = 64 - (bit_off + bit_sz - byte_off * 8); -#else - *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); -#endif - break; - case BPF_FIELD_RSHIFT_U64: - *val = 64 - bit_sz; - if (validate) - *validate = true; /* right shift is never ambiguous */ - break; - case BPF_FIELD_EXISTS: - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, - const struct bpf_core_spec *spec, - __u32 *val) -{ - __s64 sz; - - /* type-based relos return zero when target type is not found */ - if (!spec) { - *val = 0; - return 0; - } - - switch (relo->kind) { - case BPF_TYPE_ID_TARGET: - *val = spec->root_type_id; - break; - case BPF_TYPE_EXISTS: - *val = 1; - break; - case BPF_TYPE_SIZE: - sz = btf__resolve_size(spec->btf, spec->root_type_id); - if (sz < 0) - return -EINVAL; - *val = sz; - break; - case BPF_TYPE_ID_LOCAL: - /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, - const struct bpf_core_spec *spec, - __u32 *val) -{ - const struct btf_type *t; - const struct btf_enum *e; - - switch (relo->kind) { - case BPF_ENUMVAL_EXISTS: - *val = spec ? 1 : 0; - break; - case BPF_ENUMVAL_VALUE: - if (!spec) - return -EUCLEAN; /* request instruction poisoning */ - t = btf__type_by_id(spec->btf, spec->spec[0].type_id); - e = btf_enum(t) + spec->spec[0].idx; - *val = e->val; - break; - default: - return -EOPNOTSUPP; - } - - return 0; -} - -struct bpf_core_relo_res -{ - /* expected value in the instruction, unless validate == false */ - __u32 orig_val; - /* new value that needs to be patched up to */ - __u32 new_val; - /* relocation unsuccessful, poison instruction, but don't fail load */ - bool poison; - /* some relocations can't be validated against orig_val */ - bool validate; - /* for field byte offset relocations or the forms: - * *(T *)(rX + <off>) = rY - * rX = *(T *)(rY + <off>), - * we remember original and resolved field size to adjust direct - * memory loads of pointers and integers; this is necessary for 32-bit - * host kernel architectures, but also allows to automatically - * relocate fields that were resized from, e.g., u32 to u64, etc. - */ - bool fail_memsz_adjust; - __u32 orig_sz; - __u32 orig_type_id; - __u32 new_sz; - __u32 new_type_id; -}; - -/* Calculate original and target relocation values, given local and target - * specs and relocation kind. These values are calculated for each candidate. - * If there are multiple candidates, resulting values should all be consistent - * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. - * If instruction has to be poisoned, *poison will be set to true. - */ -static int bpf_core_calc_relo(const struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct bpf_core_spec *local_spec, - const struct bpf_core_spec *targ_spec, - struct bpf_core_relo_res *res) -{ - int err = -EOPNOTSUPP; - - res->orig_val = 0; - res->new_val = 0; - res->poison = false; - res->validate = true; - res->fail_memsz_adjust = false; - res->orig_sz = res->new_sz = 0; - res->orig_type_id = res->new_type_id = 0; - - if (core_relo_is_field_based(relo->kind)) { - err = bpf_core_calc_field_relo(prog, relo, local_spec, - &res->orig_val, &res->orig_sz, - &res->orig_type_id, &res->validate); - err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, - &res->new_val, &res->new_sz, - &res->new_type_id, NULL); - if (err) - goto done; - /* Validate if it's safe to adjust load/store memory size. - * Adjustments are performed only if original and new memory - * sizes differ. - */ - res->fail_memsz_adjust = false; - if (res->orig_sz != res->new_sz) { - const struct btf_type *orig_t, *new_t; - - orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); - new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); - - /* There are two use cases in which it's safe to - * adjust load/store's mem size: - * - reading a 32-bit kernel pointer, while on BPF - * size pointers are always 64-bit; in this case - * it's safe to "downsize" instruction size due to - * pointer being treated as unsigned integer with - * zero-extended upper 32-bits; - * - reading unsigned integers, again due to - * zero-extension is preserving the value correctly. - * - * In all other cases it's incorrect to attempt to - * load/store field because read value will be - * incorrect, so we poison relocated instruction. - */ - if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) - goto done; - if (btf_is_int(orig_t) && btf_is_int(new_t) && - btf_int_encoding(orig_t) != BTF_INT_SIGNED && - btf_int_encoding(new_t) != BTF_INT_SIGNED) - goto done; - - /* mark as invalid mem size adjustment, but this will - * only be checked for LDX/STX/ST insns - */ - res->fail_memsz_adjust = true; - } - } else if (core_relo_is_type_based(relo->kind)) { - err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); - err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); - } else if (core_relo_is_enumval_based(relo->kind)) { - err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); - err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); - } - -done: - if (err == -EUCLEAN) { - /* EUCLEAN is used to signal instruction poisoning request */ - res->poison = true; - err = 0; - } else if (err == -EOPNOTSUPP) { - /* EOPNOTSUPP means unknown/unsupported relocation */ - pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", - prog->name, relo_idx, core_relo_kind_str(relo->kind), - relo->kind, relo->insn_off / 8); - } - - return err; -} - -/* - * Turn instruction for which CO_RE relocation failed into invalid one with - * distinct signature. - */ -static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, - int insn_idx, struct bpf_insn *insn) -{ - pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", - prog->name, relo_idx, insn_idx); - insn->code = BPF_JMP | BPF_CALL; - insn->dst_reg = 0; - insn->src_reg = 0; - insn->off = 0; - /* if this instruction is reachable (not a dead code), - * verifier will complain with the following message: - * invalid func unknown#195896080 - */ - insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ -} - -static int insn_bpf_size_to_bytes(struct bpf_insn *insn) -{ - switch (BPF_SIZE(insn->code)) { - case BPF_DW: return 8; - case BPF_W: return 4; - case BPF_H: return 2; - case BPF_B: return 1; - default: return -1; - } -} - -static int insn_bytes_to_bpf_size(__u32 sz) -{ - switch (sz) { - case 8: return BPF_DW; - case 4: return BPF_W; - case 2: return BPF_H; - case 1: return BPF_B; - default: return -1; - } -} - -/* - * Patch relocatable BPF instruction. - * - * Patched value is determined by relocation kind and target specification. - * For existence relocations target spec will be NULL if field/type is not found. - * Expected insn->imm value is determined using relocation kind and local - * spec, and is checked before patching instruction. If actual insn->imm value - * is wrong, bail out with error. - * - * Currently supported classes of BPF instruction are: - * 1. rX = <imm> (assignment with immediate operand); - * 2. rX += <imm> (arithmetic operations with immediate operand); - * 3. rX = <imm64> (load with 64-bit immediate value); - * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; - * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; - * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. - */ -static int bpf_core_patch_insn(struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct bpf_core_relo_res *res) -{ - __u32 orig_val, new_val; - struct bpf_insn *insn; - int insn_idx; - __u8 class; - - if (relo->insn_off % BPF_INSN_SZ) - return -EINVAL; - insn_idx = relo->insn_off / BPF_INSN_SZ; - /* adjust insn_idx from section frame of reference to the local - * program's frame of reference; (sub-)program code is not yet - * relocated, so it's enough to just subtract in-section offset - */ - insn_idx = insn_idx - prog->sec_insn_off; - insn = &prog->insns[insn_idx]; - class = BPF_CLASS(insn->code); - - if (res->poison) { -poison: - /* poison second part of ldimm64 to avoid confusing error from - * verifier about "unknown opcode 00" - */ - if (is_ldimm64_insn(insn)) - bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); - bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); - return 0; - } - - orig_val = res->orig_val; - new_val = res->new_val; - - switch (class) { - case BPF_ALU: - case BPF_ALU64: - if (BPF_SRC(insn->code) != BPF_K) - return -EINVAL; - if (res->validate && insn->imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", - prog->name, relo_idx, - insn_idx, insn->imm, orig_val, new_val); - return -EINVAL; - } - orig_val = insn->imm; - insn->imm = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", - prog->name, relo_idx, insn_idx, - orig_val, new_val); - break; - case BPF_LDX: - case BPF_ST: - case BPF_STX: - if (res->validate && insn->off != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", - prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val); - return -EINVAL; - } - if (new_val > SHRT_MAX) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", - prog->name, relo_idx, insn_idx, new_val); - return -ERANGE; - } - if (res->fail_memsz_adjust) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " - "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", - prog->name, relo_idx, insn_idx); - goto poison; - } - - orig_val = insn->off; - insn->off = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", - prog->name, relo_idx, insn_idx, orig_val, new_val); - - if (res->new_sz != res->orig_sz) { - int insn_bytes_sz, insn_bpf_sz; - - insn_bytes_sz = insn_bpf_size_to_bytes(insn); - if (insn_bytes_sz != res->orig_sz) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", - prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); - return -EINVAL; - } - - insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); - if (insn_bpf_sz < 0) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", - prog->name, relo_idx, insn_idx, res->new_sz); - return -EINVAL; - } - - insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", - prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); - } - break; - case BPF_LD: { - __u64 imm; - - if (!is_ldimm64_insn(insn) || - insn[0].src_reg != 0 || insn[0].off != 0 || - insn_idx + 1 >= prog->insns_cnt || - insn[1].code != 0 || insn[1].dst_reg != 0 || - insn[1].src_reg != 0 || insn[1].off != 0) { - pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", - prog->name, relo_idx, insn_idx); - return -EINVAL; - } - - imm = insn[0].imm + ((__u64)insn[1].imm << 32); - if (res->validate && imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", - prog->name, relo_idx, - insn_idx, (unsigned long long)imm, - orig_val, new_val); - return -EINVAL; - } - - insn[0].imm = new_val; - insn[1].imm = 0; /* currently only 32-bit values are supported */ - pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", - prog->name, relo_idx, insn_idx, - (unsigned long long)imm, new_val); - break; - } - default: - pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", - prog->name, relo_idx, insn_idx, insn->code, - insn->src_reg, insn->dst_reg, insn->off, insn->imm); - return -EINVAL; - } - - return 0; -} - -/* Output spec definition in the format: - * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, - * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b - */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) -{ - const struct btf_type *t; - const struct btf_enum *e; - const char *s; - __u32 type_id; - int i; - - type_id = spec->root_type_id; - t = btf__type_by_id(spec->btf, type_id); - s = btf__name_by_offset(spec->btf, t->name_off); - - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); - - if (core_relo_is_type_based(spec->relo_kind)) - return; - - if (core_relo_is_enumval_based(spec->relo_kind)) { - t = skip_mods_and_typedefs(spec->btf, type_id, NULL); - e = btf_enum(t) + spec->raw_spec[0]; - s = btf__name_by_offset(spec->btf, e->name_off); - - libbpf_print(level, "::%s = %u", s, e->val); - return; - } - - if (core_relo_is_field_based(spec->relo_kind)) { - for (i = 0; i < spec->len; i++) { - if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); - else if (i > 0 || spec->spec[i].idx > 0) - libbpf_print(level, "[%u]", spec->spec[i].idx); - } - - libbpf_print(level, " ("); - for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); - - if (spec->bit_offset % 8) - libbpf_print(level, " @ offset %u.%u)", - spec->bit_offset / 8, spec->bit_offset % 8); - else - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); - return; - } -} - static size_t bpf_core_hash_fn(const void *key, void *ctx) { return (size_t)key; @@ -6130,73 +5073,33 @@ static void *u32_as_hash_key(__u32 x) return (void *)(uintptr_t)x; } -/* - * CO-RE relocate single instruction. - * - * The outline and important points of the algorithm: - * 1. For given local type, find corresponding candidate target types. - * Candidate type is a type with the same "essential" name, ignoring - * everything after last triple underscore (___). E.g., `sample`, - * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates - * for each other. Names with triple underscore are referred to as - * "flavors" and are useful, among other things, to allow to - * specify/support incompatible variations of the same kernel struct, which - * might differ between different kernel versions and/or build - * configurations. - * - * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C - * converter, when deduplicated BTF of a kernel still contains more than - * one different types with the same name. In that case, ___2, ___3, etc - * are appended starting from second name conflict. But start flavors are - * also useful to be defined "locally", in BPF program, to extract same - * data from incompatible changes between different kernel - * versions/configurations. For instance, to handle field renames between - * kernel versions, one can use two flavors of the struct name with the - * same common name and use conditional relocations to extract that field, - * depending on target kernel version. - * 2. For each candidate type, try to match local specification to this - * candidate target type. Matching involves finding corresponding - * high-level spec accessors, meaning that all named fields should match, - * as well as all array accesses should be within the actual bounds. Also, - * types should be compatible (see bpf_core_fields_are_compat for details). - * 3. It is supported and expected that there might be multiple flavors - * matching the spec. As long as all the specs resolve to the same set of - * offsets across all candidates, there is no error. If there is any - * ambiguity, CO-RE relocation will fail. This is necessary to accomodate - * imprefection of BTF deduplication, which can cause slight duplication of - * the same BTF type, if some directly or indirectly referenced (by - * pointer) type gets resolved to different actual types in different - * object files. If such situation occurs, deduplicated BTF will end up - * with two (or more) structurally identical types, which differ only in - * types they refer to through pointer. This should be OK in most cases and - * is not an error. - * 4. Candidate types search is performed by linearly scanning through all - * types in target BTF. It is anticipated that this is overall more - * efficient memory-wise and not significantly worse (if not better) - * CPU-wise compared to prebuilding a map from all local type names to - * a list of candidate type names. It's also sped up by caching resolved - * list of matching candidates per each local "root" type ID, that has at - * least one bpf_core_relo associated with it. This list is shared - * between multiple relocations for the same type ID and is updated as some - * of the candidates are pruned due to structural incompatibility. - */ static int bpf_core_apply_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, struct hashmap *cand_cache) { - struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; const void *type_key = u32_as_hash_key(relo->type_id); - struct bpf_core_relo_res cand_res, targ_res; + struct bpf_core_cand_list *cands = NULL; + const char *prog_name = prog->name; const struct btf_type *local_type; const char *local_name; - struct core_cand_list *cands = NULL; - __u32 local_id; - const char *spec_str; - int i, j, err; + __u32 local_id = relo->type_id; + struct bpf_insn *insn; + int insn_idx, err; + + if (relo->insn_off % BPF_INSN_SZ) + return -EINVAL; + insn_idx = relo->insn_off / BPF_INSN_SZ; + /* adjust insn_idx from section frame of reference to the local + * program's frame of reference; (sub-)program code is not yet + * relocated, so it's enough to just subtract in-section offset + */ + insn_idx = insn_idx - prog->sec_insn_off; + if (insn_idx > prog->insns_cnt) + return -EINVAL; + insn = &prog->insns[insn_idx]; - local_id = relo->type_id; local_type = btf__type_by_id(local_btf, local_id); if (!local_type) return -EINVAL; @@ -6205,51 +5108,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (!local_name) return -EINVAL; - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - if (prog->obj->gen_loader) { - pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n", + pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", prog - prog->obj->programs, relo->insn_off / 8, - local_name, spec_str, relo->kind); + local_name, relo->kind); return -ENOTSUP; } - err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); - if (err) { - pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", - prog->name, relo_idx, local_id, btf_kind_str(local_type), - str_is_empty(local_name) ? "<anon>" : local_name, - spec_str, err); - return -EINVAL; - } - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name, - relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); - - /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ - if (relo->kind == BPF_TYPE_ID_LOCAL) { - targ_res.validate = true; - targ_res.poison = false; - targ_res.orig_val = local_spec.root_type_id; - targ_res.new_val = local_spec.root_type_id; - goto patch_insn; - } - - /* libbpf doesn't support candidate search for anonymous types */ - if (str_is_empty(spec_str)) { - pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", - prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); - return -EOPNOTSUPP; - } - - if (!hashmap__find(cand_cache, type_key, (void **)&cands)) { + if (relo->kind != BPF_TYPE_ID_LOCAL && + !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", - prog->name, relo_idx, local_id, btf_kind_str(local_type), + prog_name, relo_idx, local_id, btf_kind_str(local_type), local_name, PTR_ERR(cands)); return PTR_ERR(cands); } @@ -6260,97 +5131,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, } } - for (i = 0, j = 0; i < cands->len; i++) { - err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, - cands->cands[i].id, &cand_spec); - if (err < 0) { - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog->name, relo_idx, i); - bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); - libbpf_print(LIBBPF_WARN, ": %d\n", err); - return err; - } - - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name, - relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); - - if (err == 0) - continue; - - err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res); - if (err) - return err; - - if (j == 0) { - targ_res = cand_res; - targ_spec = cand_spec; - } else if (cand_spec.bit_offset != targ_spec.bit_offset) { - /* if there are many field relo candidates, they - * should all resolve to the same bit offset - */ - pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", - prog->name, relo_idx, cand_spec.bit_offset, - targ_spec.bit_offset); - return -EINVAL; - } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { - /* all candidates should result in the same relocation - * decision and value, otherwise it's dangerous to - * proceed due to ambiguity - */ - pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", - prog->name, relo_idx, - cand_res.poison ? "failure" : "success", cand_res.new_val, - targ_res.poison ? "failure" : "success", targ_res.new_val); - return -EINVAL; - } - - cands->cands[j++] = cands->cands[i]; - } - - /* - * For BPF_FIELD_EXISTS relo or when used BPF program has field - * existence checks or kernel version/config checks, it's expected - * that we might not find any candidates. In this case, if field - * wasn't found in any candidate, the list of candidates shouldn't - * change at all, we'll just handle relocating appropriately, - * depending on relo's kind. - */ - if (j > 0) - cands->len = j; - - /* - * If no candidates were found, it might be both a programmer error, - * as well as expected case, depending whether instruction w/ - * relocation is guarded in some way that makes it unreachable (dead - * code) if relocation can't be resolved. This is handled in - * bpf_core_patch_insn() uniformly by replacing that instruction with - * BPF helper call insn (using invalid helper ID). If that instruction - * is indeed unreachable, then it will be ignored and eliminated by - * verifier. If it was an error, then verifier will complain and point - * to a specific instruction number in its log. - */ - if (j == 0) { - pr_debug("prog '%s': relo #%d: no matching targets found\n", - prog->name, relo_idx); - - /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); - if (err) - return err; - } - -patch_insn: - /* bpf_core_patch_insn() should know how to handle missing targ_spec */ - err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); - if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n", - prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err); - return -EINVAL; - } - - return 0; + return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); } static int @@ -7190,7 +5971,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj) for (i = 0; i < obj->nr_programs; i++) { struct bpf_program *p = &obj->programs[i]; - + if (!p->nr_reloc) continue; @@ -7554,7 +6335,7 @@ static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig; + const char *obj_name, *kconfig, *btf_tmp_path; struct bpf_program *prog; struct bpf_object *obj; char tmp_name[64]; @@ -7585,11 +6366,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, if (IS_ERR(obj)) return obj; + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); + if (btf_tmp_path) { + if (strlen(btf_tmp_path) >= PATH_MAX) { + err = -ENAMETOOLONG; + goto out; + } + obj->btf_custom_path = strdup(btf_tmp_path); + if (!obj->btf_custom_path) { + err = -ENOMEM; + goto out; + } + } + kconfig = OPTS_GET(opts, kconfig, NULL); if (kconfig) { obj->kconfig = strdup(kconfig); - if (!obj->kconfig) - return ERR_PTR(-ENOMEM); + if (!obj->kconfig) { + err = -ENOMEM; + goto out; + } } err = bpf_object__elf_init(obj); @@ -8055,7 +6851,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__relocate(obj, attr->target_btf_path); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); err = err ? : bpf_object__load_progs(obj, attr->log_level); if (obj->gen_loader) { @@ -8450,6 +7246,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map) return map->pin_path; } +const char *bpf_map__pin_path(const struct bpf_map *map) +{ + return map->pin_path; +} + bool bpf_map__is_pinned(const struct bpf_map *map) { return map->pinned; @@ -8702,6 +7503,7 @@ void bpf_object__close(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) bpf_map__destroy(&obj->maps[i]); + zfree(&obj->btf_custom_path); zfree(&obj->kconfig); zfree(&obj->externs); obj->nr_extern = 0; @@ -9471,7 +8273,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, ret = snprintf(btf_type_name, sizeof(btf_type_name), "%s%s", prefix, name); /* snprintf returns the number of characters written excluding the - * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it + * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it * indicates truncation. */ if (ret < 0 || ret >= sizeof(btf_type_name)) @@ -9495,7 +8297,7 @@ int libbpf_find_vmlinux_btf_id(const char *name, struct btf *btf; int err; - btf = libbpf_find_kernel_btf(); + btf = btf__load_vmlinux_btf(); err = libbpf_get_error(btf); if (err) { pr_warn("vmlinux BTF is not found\n"); @@ -9514,8 +8316,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { struct bpf_prog_info_linear *info_linear; struct bpf_prog_info *info; - struct btf *btf = NULL; - int err = -EINVAL; + struct btf *btf; + int err; info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); err = libbpf_get_error(info_linear); @@ -9524,12 +8326,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) attach_prog_fd); return err; } + + err = -EINVAL; info = &info_linear->info; if (!info->btf_id) { pr_warn("The target program doesn't have BTF\n"); goto out; } - if (btf__get_from_id(info->btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { pr_warn("Failed to get BTF of the program\n"); goto out; } @@ -10013,7 +8818,7 @@ struct bpf_link { int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { int ret; - + ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); return libbpf_err_errno(ret); } @@ -10304,16 +9109,25 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, return pfd; } -struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, - bool retprobe, - const char *func_name) +struct bpf_link * +bpf_program__attach_kprobe_opts(struct bpf_program *prog, + const char *func_name, + struct bpf_kprobe_opts *opts) { char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; + unsigned long offset; + bool retprobe; int pfd, err; + if (!OPTS_VALID(opts, bpf_kprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + retprobe = OPTS_GET(opts, retprobe, false); + offset = OPTS_GET(opts, offset, 0); + pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, - 0 /* offset */, -1 /* pid */); + offset, -1 /* pid */); if (pfd < 0) { pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, @@ -10332,16 +9146,47 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return link; } +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, + bool retprobe, + const char *func_name) +{ + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, + .retprobe = retprobe, + ); + + return bpf_program__attach_kprobe_opts(prog, func_name, &opts); +} + static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, struct bpf_program *prog) { + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); + unsigned long offset = 0; + struct bpf_link *link; const char *func_name; - bool retprobe; + char *func; + int n, err; func_name = prog->sec_name + sec->len; - retprobe = strcmp(sec->sec, "kretprobe/") == 0; + opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; - return bpf_program__attach_kprobe(prog, retprobe, func_name); + n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); + if (n < 1) { + err = -EINVAL; + pr_warn("kprobe name is invalid: %s\n", func_name); + return libbpf_err_ptr(err); + } + if (opts.retprobe && offset != 0) { + free(func); + err = -EINVAL; + pr_warn("kretprobes do not support offset specification\n"); + return libbpf_err_ptr(err); + } + + opts.offset = offset; + link = bpf_program__attach_kprobe_opts(prog, func, &opts); + free(func); + return link; } struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6e61342ba56c..1271d99bb7aa 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -94,8 +94,26 @@ struct bpf_object_open_opts { * system Kconfig for CONFIG_xxx externs. */ const char *kconfig; + /* Path to the custom BTF to be used for BPF CO-RE relocations. + * This custom BTF completely replaces the use of vmlinux BTF + * for the purpose of CO-RE relocations. + * NOTE: any other BPF feature (e.g., fentry/fexit programs, + * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. + */ + const char *btf_custom_path; }; -#define bpf_object_open_opts__last_field kconfig +#define bpf_object_open_opts__last_field btf_custom_path + +struct bpf_kprobe_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* function's offset to install kprobe to */ + unsigned long offset; + /* kprobe is return probe */ + bool retprobe; + size_t :0; +}; +#define bpf_kprobe_opts__last_field retprobe LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * @@ -243,6 +261,10 @@ LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, const char *func_name); LIBBPF_API struct bpf_link * +bpf_program__attach_kprobe_opts(struct bpf_program *prog, + const char *func_name, + struct bpf_kprobe_opts *opts); +LIBBPF_API struct bpf_link * bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); @@ -477,6 +499,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 944c99d1ded3..58e0fb2c482f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -371,7 +371,15 @@ LIBBPF_0.4.0 { LIBBPF_0.5.0 { global: bpf_map__initial_value; + bpf_map__pin_path; bpf_map_lookup_and_delete_elem_flags; + bpf_program__attach_kprobe_opts; bpf_object__gen_loader; + btf__load_from_kernel_by_id; + btf__load_from_kernel_by_id_split; + btf__load_into_kernel; + btf__load_module_btf; + btf__load_vmlinux_btf; + btf_dump__dump_type_data; libbpf_set_strict_mode; } LIBBPF_0.4.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 016ca7cb4f8a..f7b691d5f9eb 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -14,6 +14,7 @@ #include <errno.h> #include <linux/err.h> #include "libbpf_legacy.h" +#include "relo_core.h" /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -366,76 +367,6 @@ struct bpf_line_info_min { __u32 line_col; }; -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. - */ -enum bpf_core_relo_kind { - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ - BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ - BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ - BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ - BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ - BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ - BPF_TYPE_SIZE = 9, /* type size in bytes */ - BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ - BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ -}; - -/* The minimum bpf_core_relo checked by the loader - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * type or field; - * - access_str_off - offset into corresponding .BTF string section. String - * interpretation depends on specific relocation kind: - * - for field-based relocations, string encodes an accessed field using - * a sequence of field and array indices, separated by colon (:). It's - * conceptually very close to LLVM's getelementptr ([0]) instruction's - * arguments for identifying offset to a field. - * - for type-based relocations, strings is expected to be just "0"; - * - for enum value-based relocations, string contains an index of enum - * value within its enum type; - * - * Example to provide a better feel. - * - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample *s = ...; - * int x = &s->a; // encoded as "0:0" (a is field #0) - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - * __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { - __u32 insn_off; - __u32 type_id; - __u32 access_str_off; - enum bpf_core_relo_kind kind; -}; typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); @@ -494,4 +425,14 @@ static inline void *libbpf_ptr(void *ret) return ret; } +static inline bool str_is_empty(const char *s) +{ + return !s || !s[0]; +} + +static inline bool is_ldimm64_insn(struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c new file mode 100644 index 000000000000..4016ed492d0c --- /dev/null +++ b/tools/lib/bpf/relo_core.c @@ -0,0 +1,1295 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2019 Facebook */ + +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> +#include <linux/err.h> + +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" +#include "str_error.h" +#include "libbpf_internal.h" + +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* original unresolved (no skip_mods_or_typedefs) root type ID */ + __u32 root_type_id; + /* CO-RE relocation kind */ + enum bpf_core_relo_kind relo_kind; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field bit offset represented by spec */ + __u32 bit_offset; +}; + +static bool is_flex_arr(const struct btf *btf, + const struct bpf_core_accessor *acc, + const struct btf_array *arr) +{ + const struct btf_type *t; + + /* not a flexible array, if not inside a struct or has non-zero size */ + if (!acc->name || arr->nelems > 0) + return false; + + /* has to be the last member of enclosing struct */ + t = btf__type_by_id(btf, acc->type_id); + return acc->idx == btf_vlen(t) - 1; +} + +static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_FIELD_BYTE_OFFSET: return "byte_off"; + case BPF_FIELD_BYTE_SIZE: return "byte_sz"; + case BPF_FIELD_EXISTS: return "field_exists"; + case BPF_FIELD_SIGNED: return "signed"; + case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; + case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; + case BPF_TYPE_ID_LOCAL: return "local_type_id"; + case BPF_TYPE_ID_TARGET: return "target_type_id"; + case BPF_TYPE_EXISTS: return "type_exists"; + case BPF_TYPE_SIZE: return "type_size"; + case BPF_ENUMVAL_EXISTS: return "enumval_exists"; + case BPF_ENUMVAL_VALUE: return "enumval_value"; + default: return "unknown"; + } +} + +static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_FIELD_BYTE_OFFSET: + case BPF_FIELD_BYTE_SIZE: + case BPF_FIELD_EXISTS: + case BPF_FIELD_SIGNED: + case BPF_FIELD_LSHIFT_U64: + case BPF_FIELD_RSHIFT_U64: + return true; + default: + return false; + } +} + +static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_TYPE_ID_LOCAL: + case BPF_TYPE_ID_TARGET: + case BPF_TYPE_EXISTS: + case BPF_TYPE_SIZE: + return true; + default: + return false; + } +} + +static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_ENUMVAL_EXISTS: + case BPF_ENUMVAL_VALUE: + return true; + default: + return false; + } +} + +/* + * Turn bpf_core_relo into a low- and high-level spec representation, + * validating correctness along the way, as well as calculating resulting + * field bit offset, specified by accessor string. Low-level spec captures + * every single level of nestedness, including traversing anonymous + * struct/union members. High-level one only captures semantically meaningful + * "turning points": named fields and array indicies. + * E.g., for this case: + * + * struct sample { + * int __unimportant; + * struct { + * int __1; + * int __2; + * int a[7]; + * }; + * }; + * + * struct sample *s = ...; + * + * int x = &s->a[3]; // access string = '0:1:2:3' + * + * Low-level spec has 1:1 mapping with each element of access string (it's + * just a parsed access string representation): [0, 1, 2, 3]. + * + * High-level spec will capture only 3 points: + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - field 'a' access (corresponds to '2' in low-level spec); + * - array element #3 access (corresponds to '3' in low-level spec). + * + * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, + * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their + * spec and raw_spec are kept empty. + * + * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access + * string to specify enumerator's value index that need to be relocated. + */ +static int bpf_core_parse_spec(const struct btf *btf, + __u32 type_id, + const char *spec_str, + enum bpf_core_relo_kind relo_kind, + struct bpf_core_spec *spec) +{ + int access_idx, parsed_len, i; + struct bpf_core_accessor *acc; + const struct btf_type *t; + const char *name; + __u32 id; + __s64 sz; + + if (str_is_empty(spec_str) || *spec_str == ':') + return -EINVAL; + + memset(spec, 0, sizeof(*spec)); + spec->btf = btf; + spec->root_type_id = type_id; + spec->relo_kind = relo_kind; + + /* type-based relocations don't have a field access string */ + if (core_relo_is_type_based(relo_kind)) { + if (strcmp(spec_str, "0")) + return -EINVAL; + return 0; + } + + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ + while (*spec_str) { + if (*spec_str == ':') + ++spec_str; + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) + return -EINVAL; + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + spec_str += parsed_len; + spec->raw_spec[spec->raw_len++] = access_idx; + } + + if (spec->raw_len == 0) + return -EINVAL; + + t = skip_mods_and_typedefs(btf, type_id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[0]; + acc = &spec->spec[0]; + acc->type_id = id; + acc->idx = access_idx; + spec->len++; + + if (core_relo_is_enumval_based(relo_kind)) { + if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) + return -EINVAL; + + /* record enumerator name in a first accessor */ + acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); + return 0; + } + + if (!core_relo_is_field_based(relo_kind)) + return -EINVAL; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->bit_offset = access_idx * sz * 8; + + for (i = 1; i < spec->raw_len; i++) { + t = skip_mods_and_typedefs(btf, id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[i]; + acc = &spec->spec[spec->len]; + + if (btf_is_composite(t)) { + const struct btf_member *m; + __u32 bit_offset; + + if (access_idx >= btf_vlen(t)) + return -EINVAL; + + bit_offset = btf_member_bit_offset(t, access_idx); + spec->bit_offset += bit_offset; + + m = btf_members(t) + access_idx; + if (m->name_off) { + name = btf__name_by_offset(btf, m->name_off); + if (str_is_empty(name)) + return -EINVAL; + + acc->type_id = id; + acc->idx = access_idx; + acc->name = name; + spec->len++; + } + + id = m->type; + } else if (btf_is_array(t)) { + const struct btf_array *a = btf_array(t); + bool flex; + + t = skip_mods_and_typedefs(btf, a->type, &id); + if (!t) + return -EINVAL; + + flex = is_flex_arr(btf, acc - 1, a); + if (!flex && access_idx >= a->nelems) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->bit_offset += access_idx * sz * 8; + } else { + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", + type_id, spec_str, i, id, btf_kind_str(t)); + return -EINVAL; + } + } + + return 0; +} + +/* Check two types for compatibility for the purpose of field access + * relocation. const/volatile/restrict and typedefs are skipped to ensure we + * are relocating semantically compatible entities: + * - any two STRUCTs/UNIONs are compatible and can be mixed; + * - any two FWDs are compatible, if their names match (modulo flavor suffix); + * - any two PTRs are always compatible; + * - for ENUMs, names should be the same (ignoring flavor suffix) or at + * least one of enums should be anonymous; + * - for ENUMs, check sizes, names are ignored; + * - for INT, size and signedness are ignored; + * - any two FLOATs are always compatible; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +static int bpf_core_fields_are_compat(const struct btf *local_btf, + __u32 local_id, + const struct btf *targ_btf, + __u32 targ_id) +{ + const struct btf_type *local_type, *targ_type; + +recur: + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) + return 1; + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_PTR: + case BTF_KIND_FLOAT: + return 1; + case BTF_KIND_FWD: + case BTF_KIND_ENUM: { + const char *local_name, *targ_name; + size_t local_len, targ_len; + + local_name = btf__name_by_offset(local_btf, + local_type->name_off); + targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); + local_len = bpf_core_essential_name_len(local_name); + targ_len = bpf_core_essential_name_len(targ_name); + /* one of them is anonymous or both w/ same flavor-less names */ + return local_len == 0 || targ_len == 0 || + (local_len == targ_len && + strncmp(local_name, targ_name, local_len) == 0); + } + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && + btf_int_offset(targ_type) == 0; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + default: + pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); + return 0; + } +} + +/* + * Given single high-level named field accessor in local type, find + * corresponding high-level accessor for a target type. Along the way, + * maintain low-level spec for target as well. Also keep updating target + * bit offset. + * + * Searching is performed through recursive exhaustive enumeration of all + * fields of a struct/union. If there are any anonymous (embedded) + * structs/unions, they are recursively searched as well. If field with + * desired name is found, check compatibility between local and target types, + * before returning result. + * + * 1 is returned, if field is found. + * 0 is returned if no compatible field is found. + * <0 is returned on error. + */ +static int bpf_core_match_member(const struct btf *local_btf, + const struct bpf_core_accessor *local_acc, + const struct btf *targ_btf, + __u32 targ_id, + struct bpf_core_spec *spec, + __u32 *next_targ_id) +{ + const struct btf_type *local_type, *targ_type; + const struct btf_member *local_member, *m; + const char *local_name, *targ_name; + __u32 local_id; + int i, n, found; + + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!targ_type) + return -EINVAL; + if (!btf_is_composite(targ_type)) + return 0; + + local_id = local_acc->type_id; + local_type = btf__type_by_id(local_btf, local_id); + local_member = btf_members(local_type) + local_acc->idx; + local_name = btf__name_by_offset(local_btf, local_member->name_off); + + n = btf_vlen(targ_type); + m = btf_members(targ_type); + for (i = 0; i < n; i++, m++) { + __u32 bit_offset; + + bit_offset = btf_member_bit_offset(targ_type, i); + + /* too deep struct/union/array nesting */ + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + /* speculate this member will be the good one */ + spec->bit_offset += bit_offset; + spec->raw_spec[spec->raw_len++] = i; + + targ_name = btf__name_by_offset(targ_btf, m->name_off); + if (str_is_empty(targ_name)) { + /* embedded struct/union, we need to go deeper */ + found = bpf_core_match_member(local_btf, local_acc, + targ_btf, m->type, + spec, next_targ_id); + if (found) /* either found or error */ + return found; + } else if (strcmp(local_name, targ_name) == 0) { + /* matching named field */ + struct bpf_core_accessor *targ_acc; + + targ_acc = &spec->spec[spec->len++]; + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + + *next_targ_id = m->type; + found = bpf_core_fields_are_compat(local_btf, + local_member->type, + targ_btf, m->type); + if (!found) + spec->len--; /* pop accessor */ + return found; + } + /* member turned out not to be what we looked for */ + spec->bit_offset -= bit_offset; + spec->raw_len--; + } + + return 0; +} + +/* + * Try to match local spec to a target type and, if successful, produce full + * target spec (high-level, low-level + bit offset). + */ +static int bpf_core_spec_match(struct bpf_core_spec *local_spec, + const struct btf *targ_btf, __u32 targ_id, + struct bpf_core_spec *targ_spec) +{ + const struct btf_type *targ_type; + const struct bpf_core_accessor *local_acc; + struct bpf_core_accessor *targ_acc; + int i, sz, matched; + + memset(targ_spec, 0, sizeof(*targ_spec)); + targ_spec->btf = targ_btf; + targ_spec->root_type_id = targ_id; + targ_spec->relo_kind = local_spec->relo_kind; + + if (core_relo_is_type_based(local_spec->relo_kind)) { + return bpf_core_types_are_compat(local_spec->btf, + local_spec->root_type_id, + targ_btf, targ_id); + } + + local_acc = &local_spec->spec[0]; + targ_acc = &targ_spec->spec[0]; + + if (core_relo_is_enumval_based(local_spec->relo_kind)) { + size_t local_essent_len, targ_essent_len; + const struct btf_enum *e; + const char *targ_name; + + /* has to resolve to an enum */ + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); + if (!btf_is_enum(targ_type)) + return 0; + + local_essent_len = bpf_core_essential_name_len(local_acc->name); + + for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { + targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != local_essent_len) + continue; + if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + return 1; + } + } + return 0; + } + + if (!core_relo_is_field_based(local_spec->relo_kind)) + return -EINVAL; + + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, + &targ_id); + if (!targ_type) + return -EINVAL; + + if (local_acc->name) { + matched = bpf_core_match_member(local_spec->btf, + local_acc, + targ_btf, targ_id, + targ_spec, &targ_id); + if (matched <= 0) + return matched; + } else { + /* for i=0, targ_id is already treated as array element + * type (because it's the original struct), for others + * we should find array element type first + */ + if (i > 0) { + const struct btf_array *a; + bool flex; + + if (!btf_is_array(targ_type)) + return 0; + + a = btf_array(targ_type); + flex = is_flex_arr(targ_btf, targ_acc - 1, a); + if (!flex && local_acc->idx >= a->nelems) + return 0; + if (!skip_mods_and_typedefs(targ_btf, a->type, + &targ_id)) + return -EINVAL; + } + + /* too deep struct/union/array nesting */ + if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + targ_acc->type_id = targ_id; + targ_acc->idx = local_acc->idx; + targ_acc->name = NULL; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + + sz = btf__resolve_size(targ_btf, targ_id); + if (sz < 0) + return sz; + targ_spec->bit_offset += local_acc->idx * sz * 8; + } + } + + return 1; +} + +static int bpf_core_calc_field_relo(const char *prog_name, + const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val, __u32 *field_sz, __u32 *type_id, + bool *validate) +{ + const struct bpf_core_accessor *acc; + const struct btf_type *t; + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; + const struct btf_member *m; + const struct btf_type *mt; + bool bitfield; + __s64 sz; + + *field_sz = 0; + + if (relo->kind == BPF_FIELD_EXISTS) { + *val = spec ? 1 : 0; + return 0; + } + + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ + + acc = &spec->spec[spec->len - 1]; + t = btf__type_by_id(spec->btf, acc->type_id); + + /* a[n] accessor needs special handling */ + if (!acc->name) { + if (relo->kind == BPF_FIELD_BYTE_OFFSET) { + *val = spec->bit_offset / 8; + /* remember field size for load/store mem size */ + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *field_sz = sz; + *type_id = acc->type_id; + } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + } else { + pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", + prog_name, relo->kind, relo->insn_off / 8); + return -EINVAL; + } + if (validate) + *validate = true; + return 0; + } + + m = btf_members(t) + acc->idx; + mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); + bit_off = spec->bit_offset; + bit_sz = btf_member_bitfield_size(t, acc->idx); + + bitfield = bit_sz > 0; + if (bitfield) { + byte_sz = mt->size; + byte_off = bit_off / 8 / byte_sz * byte_sz; + /* figure out smallest int size necessary for bitfield load */ + while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { + if (byte_sz >= 8) { + /* bitfield can't be read with 64-bit read */ + pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", + prog_name, relo->kind, relo->insn_off / 8); + return -E2BIG; + } + byte_sz *= 2; + byte_off = bit_off / 8 / byte_sz * byte_sz; + } + } else { + sz = btf__resolve_size(spec->btf, field_type_id); + if (sz < 0) + return -EINVAL; + byte_sz = sz; + byte_off = spec->bit_offset / 8; + bit_sz = byte_sz * 8; + } + + /* for bitfields, all the relocatable aspects are ambiguous and we + * might disagree with compiler, so turn off validation of expected + * value, except for signedness + */ + if (validate) + *validate = !bitfield; + + switch (relo->kind) { + case BPF_FIELD_BYTE_OFFSET: + *val = byte_off; + if (!bitfield) { + *field_sz = byte_sz; + *type_id = field_type_id; + } + break; + case BPF_FIELD_BYTE_SIZE: + *val = byte_sz; + break; + case BPF_FIELD_SIGNED: + /* enums will be assumed unsigned */ + *val = btf_is_enum(mt) || + (btf_int_encoding(mt) & BTF_INT_SIGNED); + if (validate) + *validate = true; /* signedness is never ambiguous */ + break; + case BPF_FIELD_LSHIFT_U64: +#if __BYTE_ORDER == __LITTLE_ENDIAN + *val = 64 - (bit_off + bit_sz - byte_off * 8); +#else + *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); +#endif + break; + case BPF_FIELD_RSHIFT_U64: + *val = 64 - bit_sz; + if (validate) + *validate = true; /* right shift is never ambiguous */ + break; + case BPF_FIELD_EXISTS: + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val) +{ + __s64 sz; + + /* type-based relos return zero when target type is not found */ + if (!spec) { + *val = 0; + return 0; + } + + switch (relo->kind) { + case BPF_TYPE_ID_TARGET: + *val = spec->root_type_id; + break; + case BPF_TYPE_EXISTS: + *val = 1; + break; + case BPF_TYPE_SIZE: + sz = btf__resolve_size(spec->btf, spec->root_type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + break; + case BPF_TYPE_ID_LOCAL: + /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val) +{ + const struct btf_type *t; + const struct btf_enum *e; + + switch (relo->kind) { + case BPF_ENUMVAL_EXISTS: + *val = spec ? 1 : 0; + break; + case BPF_ENUMVAL_VALUE: + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ + t = btf__type_by_id(spec->btf, spec->spec[0].type_id); + e = btf_enum(t) + spec->spec[0].idx; + *val = e->val; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +struct bpf_core_relo_res +{ + /* expected value in the instruction, unless validate == false */ + __u32 orig_val; + /* new value that needs to be patched up to */ + __u32 new_val; + /* relocation unsuccessful, poison instruction, but don't fail load */ + bool poison; + /* some relocations can't be validated against orig_val */ + bool validate; + /* for field byte offset relocations or the forms: + * *(T *)(rX + <off>) = rY + * rX = *(T *)(rY + <off>), + * we remember original and resolved field size to adjust direct + * memory loads of pointers and integers; this is necessary for 32-bit + * host kernel architectures, but also allows to automatically + * relocate fields that were resized from, e.g., u32 to u64, etc. + */ + bool fail_memsz_adjust; + __u32 orig_sz; + __u32 orig_type_id; + __u32 new_sz; + __u32 new_type_id; +}; + +/* Calculate original and target relocation values, given local and target + * specs and relocation kind. These values are calculated for each candidate. + * If there are multiple candidates, resulting values should all be consistent + * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. + * If instruction has to be poisoned, *poison will be set to true. + */ +static int bpf_core_calc_relo(const char *prog_name, + const struct bpf_core_relo *relo, + int relo_idx, + const struct bpf_core_spec *local_spec, + const struct bpf_core_spec *targ_spec, + struct bpf_core_relo_res *res) +{ + int err = -EOPNOTSUPP; + + res->orig_val = 0; + res->new_val = 0; + res->poison = false; + res->validate = true; + res->fail_memsz_adjust = false; + res->orig_sz = res->new_sz = 0; + res->orig_type_id = res->new_type_id = 0; + + if (core_relo_is_field_based(relo->kind)) { + err = bpf_core_calc_field_relo(prog_name, relo, local_spec, + &res->orig_val, &res->orig_sz, + &res->orig_type_id, &res->validate); + err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec, + &res->new_val, &res->new_sz, + &res->new_type_id, NULL); + if (err) + goto done; + /* Validate if it's safe to adjust load/store memory size. + * Adjustments are performed only if original and new memory + * sizes differ. + */ + res->fail_memsz_adjust = false; + if (res->orig_sz != res->new_sz) { + const struct btf_type *orig_t, *new_t; + + orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); + new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + + /* There are two use cases in which it's safe to + * adjust load/store's mem size: + * - reading a 32-bit kernel pointer, while on BPF + * size pointers are always 64-bit; in this case + * it's safe to "downsize" instruction size due to + * pointer being treated as unsigned integer with + * zero-extended upper 32-bits; + * - reading unsigned integers, again due to + * zero-extension is preserving the value correctly. + * + * In all other cases it's incorrect to attempt to + * load/store field because read value will be + * incorrect, so we poison relocated instruction. + */ + if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) + goto done; + if (btf_is_int(orig_t) && btf_is_int(new_t) && + btf_int_encoding(orig_t) != BTF_INT_SIGNED && + btf_int_encoding(new_t) != BTF_INT_SIGNED) + goto done; + + /* mark as invalid mem size adjustment, but this will + * only be checked for LDX/STX/ST insns + */ + res->fail_memsz_adjust = true; + } + } else if (core_relo_is_type_based(relo->kind)) { + err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); + } else if (core_relo_is_enumval_based(relo->kind)) { + err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); + } + +done: + if (err == -EUCLEAN) { + /* EUCLEAN is used to signal instruction poisoning request */ + res->poison = true; + err = 0; + } else if (err == -EOPNOTSUPP) { + /* EOPNOTSUPP means unknown/unsupported relocation */ + pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", + prog_name, relo_idx, core_relo_kind_str(relo->kind), + relo->kind, relo->insn_off / 8); + } + + return err; +} + +/* + * Turn instruction for which CO_RE relocation failed into invalid one with + * distinct signature. + */ +static void bpf_core_poison_insn(const char *prog_name, int relo_idx, + int insn_idx, struct bpf_insn *insn) +{ + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", + prog_name, relo_idx, insn_idx); + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with the following message: + * invalid func unknown#195896080 + */ + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ +} + +static int insn_bpf_size_to_bytes(struct bpf_insn *insn) +{ + switch (BPF_SIZE(insn->code)) { + case BPF_DW: return 8; + case BPF_W: return 4; + case BPF_H: return 2; + case BPF_B: return 1; + default: return -1; + } +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ + switch (sz) { + case 8: return BPF_DW; + case 4: return BPF_W; + case 2: return BPF_H; + case 1: return BPF_B; + default: return -1; + } +} + +/* + * Patch relocatable BPF instruction. + * + * Patched value is determined by relocation kind and target specification. + * For existence relocations target spec will be NULL if field/type is not found. + * Expected insn->imm value is determined using relocation kind and local + * spec, and is checked before patching instruction. If actual insn->imm value + * is wrong, bail out with error. + * + * Currently supported classes of BPF instruction are: + * 1. rX = <imm> (assignment with immediate operand); + * 2. rX += <imm> (arithmetic operations with immediate operand); + * 3. rX = <imm64> (load with 64-bit immediate value); + * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; + * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; + * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. + */ +static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res) +{ + __u32 orig_val, new_val; + __u8 class; + + class = BPF_CLASS(insn->code); + + if (res->poison) { +poison: + /* poison second part of ldimm64 to avoid confusing error from + * verifier about "unknown opcode 00" + */ + if (is_ldimm64_insn(insn)) + bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1); + bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn); + return 0; + } + + orig_val = res->orig_val; + new_val = res->new_val; + + switch (class) { + case BPF_ALU: + case BPF_ALU64: + if (BPF_SRC(insn->code) != BPF_K) + return -EINVAL; + if (res->validate && insn->imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + prog_name, relo_idx, + insn_idx, insn->imm, orig_val, new_val); + return -EINVAL; + } + orig_val = insn->imm; + insn->imm = new_val; + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + prog_name, relo_idx, insn_idx, + orig_val, new_val); + break; + case BPF_LDX: + case BPF_ST: + case BPF_STX: + if (res->validate && insn->off != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", + prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val); + return -EINVAL; + } + if (new_val > SHRT_MAX) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", + prog_name, relo_idx, insn_idx, new_val); + return -ERANGE; + } + if (res->fail_memsz_adjust) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " + "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", + prog_name, relo_idx, insn_idx); + goto poison; + } + + orig_val = insn->off; + insn->off = new_val; + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", + prog_name, relo_idx, insn_idx, orig_val, new_val); + + if (res->new_sz != res->orig_sz) { + int insn_bytes_sz, insn_bpf_sz; + + insn_bytes_sz = insn_bpf_size_to_bytes(insn); + if (insn_bytes_sz != res->orig_sz) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", + prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); + return -EINVAL; + } + + insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); + if (insn_bpf_sz < 0) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", + prog_name, relo_idx, insn_idx, res->new_sz); + return -EINVAL; + } + + insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", + prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz); + } + break; + case BPF_LD: { + __u64 imm; + + if (!is_ldimm64_insn(insn) || + insn[0].src_reg != 0 || insn[0].off != 0 || + insn[1].code != 0 || insn[1].dst_reg != 0 || + insn[1].src_reg != 0 || insn[1].off != 0) { + pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", + prog_name, relo_idx, insn_idx); + return -EINVAL; + } + + imm = insn[0].imm + ((__u64)insn[1].imm << 32); + if (res->validate && imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", + prog_name, relo_idx, + insn_idx, (unsigned long long)imm, + orig_val, new_val); + return -EINVAL; + } + + insn[0].imm = new_val; + insn[1].imm = 0; /* currently only 32-bit values are supported */ + pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", + prog_name, relo_idx, insn_idx, + (unsigned long long)imm, new_val); + break; + } + default: + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", + prog_name, relo_idx, insn_idx, insn->code, + insn->src_reg, insn->dst_reg, insn->off, insn->imm); + return -EINVAL; + } + + return 0; +} + +/* Output spec definition in the format: + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +{ + const struct btf_type *t; + const struct btf_enum *e; + const char *s; + __u32 type_id; + int i; + + type_id = spec->root_type_id; + t = btf__type_by_id(spec->btf, type_id); + s = btf__name_by_offset(spec->btf, t->name_off); + + libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); + + if (core_relo_is_type_based(spec->relo_kind)) + return; + + if (core_relo_is_enumval_based(spec->relo_kind)) { + t = skip_mods_and_typedefs(spec->btf, type_id, NULL); + e = btf_enum(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + + libbpf_print(level, "::%s = %u", s, e->val); + return; + } + + if (core_relo_is_field_based(spec->relo_kind)) { + for (i = 0; i < spec->len; i++) { + if (spec->spec[i].name) + libbpf_print(level, ".%s", spec->spec[i].name); + else if (i > 0 || spec->spec[i].idx > 0) + libbpf_print(level, "[%u]", spec->spec[i].idx); + } + + libbpf_print(level, " ("); + for (i = 0; i < spec->raw_len; i++) + libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + + if (spec->bit_offset % 8) + libbpf_print(level, " @ offset %u.%u)", + spec->bit_offset / 8, spec->bit_offset % 8); + else + libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); + return; + } +} + +/* + * CO-RE relocate single instruction. + * + * The outline and important points of the algorithm: + * 1. For given local type, find corresponding candidate target types. + * Candidate type is a type with the same "essential" name, ignoring + * everything after last triple underscore (___). E.g., `sample`, + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates + * for each other. Names with triple underscore are referred to as + * "flavors" and are useful, among other things, to allow to + * specify/support incompatible variations of the same kernel struct, which + * might differ between different kernel versions and/or build + * configurations. + * + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C + * converter, when deduplicated BTF of a kernel still contains more than + * one different types with the same name. In that case, ___2, ___3, etc + * are appended starting from second name conflict. But start flavors are + * also useful to be defined "locally", in BPF program, to extract same + * data from incompatible changes between different kernel + * versions/configurations. For instance, to handle field renames between + * kernel versions, one can use two flavors of the struct name with the + * same common name and use conditional relocations to extract that field, + * depending on target kernel version. + * 2. For each candidate type, try to match local specification to this + * candidate target type. Matching involves finding corresponding + * high-level spec accessors, meaning that all named fields should match, + * as well as all array accesses should be within the actual bounds. Also, + * types should be compatible (see bpf_core_fields_are_compat for details). + * 3. It is supported and expected that there might be multiple flavors + * matching the spec. As long as all the specs resolve to the same set of + * offsets across all candidates, there is no error. If there is any + * ambiguity, CO-RE relocation will fail. This is necessary to accomodate + * imprefection of BTF deduplication, which can cause slight duplication of + * the same BTF type, if some directly or indirectly referenced (by + * pointer) type gets resolved to different actual types in different + * object files. If such situation occurs, deduplicated BTF will end up + * with two (or more) structurally identical types, which differ only in + * types they refer to through pointer. This should be OK in most cases and + * is not an error. + * 4. Candidate types search is performed by linearly scanning through all + * types in target BTF. It is anticipated that this is overall more + * efficient memory-wise and not significantly worse (if not better) + * CPU-wise compared to prebuilding a map from all local type names to + * a list of candidate type names. It's also sped up by caching resolved + * list of matching candidates per each local "root" type ID, that has at + * least one bpf_core_relo associated with it. This list is shared + * between multiple relocations for the same type ID and is updated as some + * of the candidates are pruned due to structural incompatibility. + */ +int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands) +{ + struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; + struct bpf_core_relo_res cand_res, targ_res; + const struct btf_type *local_type; + const char *local_name; + __u32 local_id; + const char *spec_str; + int i, j, err; + + local_id = relo->type_id; + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) + return -EINVAL; + + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (!local_name) + return -EINVAL; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + if (str_is_empty(spec_str)) + return -EINVAL; + + err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); + if (err) { + pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", + prog_name, relo_idx, local_id, btf_kind_str(local_type), + str_is_empty(local_name) ? "<anon>" : local_name, + spec_str, err); + return -EINVAL; + } + + pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, + relo_idx, core_relo_kind_str(relo->kind), relo->kind); + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ + if (relo->kind == BPF_TYPE_ID_LOCAL) { + targ_res.validate = true; + targ_res.poison = false; + targ_res.orig_val = local_spec.root_type_id; + targ_res.new_val = local_spec.root_type_id; + goto patch_insn; + } + + /* libbpf doesn't support candidate search for anonymous types */ + if (str_is_empty(spec_str)) { + pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", + prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); + return -EOPNOTSUPP; + } + + + for (i = 0, j = 0; i < cands->len; i++) { + err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, + cands->cands[i].id, &cand_spec); + if (err < 0) { + pr_warn("prog '%s': relo #%d: error matching candidate #%d ", + prog_name, relo_idx, i); + bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); + libbpf_print(LIBBPF_WARN, ": %d\n", err); + return err; + } + + pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (err == 0) + continue; + + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); + if (err) + return err; + + if (j == 0) { + targ_res = cand_res; + targ_spec = cand_spec; + } else if (cand_spec.bit_offset != targ_spec.bit_offset) { + /* if there are many field relo candidates, they + * should all resolve to the same bit offset + */ + pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.bit_offset, + targ_spec.bit_offset); + return -EINVAL; + } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { + /* all candidates should result in the same relocation + * decision and value, otherwise it's dangerous to + * proceed due to ambiguity + */ + pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", + prog_name, relo_idx, + cand_res.poison ? "failure" : "success", cand_res.new_val, + targ_res.poison ? "failure" : "success", targ_res.new_val); + return -EINVAL; + } + + cands->cands[j++] = cands->cands[i]; + } + + /* + * For BPF_FIELD_EXISTS relo or when used BPF program has field + * existence checks or kernel version/config checks, it's expected + * that we might not find any candidates. In this case, if field + * wasn't found in any candidate, the list of candidates shouldn't + * change at all, we'll just handle relocating appropriately, + * depending on relo's kind. + */ + if (j > 0) + cands->len = j; + + /* + * If no candidates were found, it might be both a programmer error, + * as well as expected case, depending whether instruction w/ + * relocation is guarded in some way that makes it unreachable (dead + * code) if relocation can't be resolved. This is handled in + * bpf_core_patch_insn() uniformly by replacing that instruction with + * BPF helper call insn (using invalid helper ID). If that instruction + * is indeed unreachable, then it will be ignored and eliminated by + * verifier. If it was an error, then verifier will complain and point + * to a specific instruction number in its log. + */ + if (j == 0) { + pr_debug("prog '%s': relo #%d: no matching targets found\n", + prog_name, relo_idx); + + /* calculate single target relo result explicitly */ + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); + if (err) + return err; + } + +patch_insn: + /* bpf_core_patch_insn() should know how to handle missing targ_spec */ + err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res); + if (err) { + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", + prog_name, relo_idx, relo->insn_off / 8, err); + return -EINVAL; + } + + return 0; +} diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h new file mode 100644 index 000000000000..3b9f8f18346c --- /dev/null +++ b/tools/lib/bpf/relo_core.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __RELO_CORE_H +#define __RELO_CORE_H + +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. + */ +enum bpf_core_relo_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_TYPE_SIZE = 9, /* type size in bytes */ + BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + +/* The minimum bpf_core_relo checked by the loader + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * + * Example to provide a better feel. + * + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int x = &s->a; // encoded as "0:0" (a is field #0) + * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + +struct bpf_core_cand { + const struct btf *btf; + const struct btf_type *t; + const char *name; + __u32 id; +}; + +/* dynamically sized list of type IDs and its associated struct btf */ +struct bpf_core_cand_list { + struct bpf_core_cand *cands; + int len; +}; + +int bpf_core_apply_relo_insn(const char *prog_name, + struct bpf_insn *insn, int insn_idx, + const struct bpf_core_relo *relo, int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands); +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id); + +size_t bpf_core_essential_name_len(const char *name); +#endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index cdecda1ddd36..996d025b8ed8 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, free(info_linear); return -1; } - if (btf__get_from_id(info->btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id); err = -1; - btf = NULL; goto out; } perf_env__fetch_btf(env, info->btf_id, btf); @@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, out: free(info_linear); - free(btf); + btf__free(btf); return err ? -1 : 0; } @@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) if (btf_id == 0) goto out; - if (btf__get_from_id(btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(btf_id); + if (libbpf_get_error(btf)) { pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, btf_id); goto out; @@ -486,7 +487,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) perf_env__fetch_btf(env, btf_id, btf); out: - free(btf); + btf__free(btf); close(fd); } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 8150e03367bb..ba0f20853651 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd) struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; const struct btf_type *t; + struct btf *btf = NULL; char *name = NULL; - struct btf *btf; info_linear = bpf_program__get_prog_info_linear( tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); @@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd) return NULL; } - if (info_linear->info.btf_id == 0 || - btf__get_from_id(info_linear->info.btf_id, &btf)) { + if (info_linear->info.btf_id == 0) { pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd); goto out; } + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + if (libbpf_get_error(btf)) { + pr_debug("failed to load btf for prog FD %d\n", tgt_fd); + goto out; + } + func_info = u64_to_ptr(info_linear->info.func_info); t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { @@ -89,6 +94,7 @@ static char *bpf_target_prog_name(int tgt_fd) } name = strdup(btf__name_by_offset(btf, t->name_off)); out: + btf__free(btf); free(info_linear); return name; } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index fb010a35d61a..da9e8b699e42 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -38,6 +38,7 @@ TARGETS += mount_setattr TARGETS += mqueue TARGETS += nci TARGETS += net +TARGETS += net/af_unix TARGETS += net/forwarding TARGETS += net/mptcp TARGETS += netfilter diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 8deec1ca9150..9b17f2867488 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and saves the resulting output (by default in ``~/.bpf_selftests``). +Script dependencies: +- clang (preferably built from sources, https://github.com/llvm/llvm-project); +- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/); +- qemu; +- docutils (for ``rst2man``); +- libcap-devel. + For more information on about using the script, run: .. code-block:: console diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h index 81084c1c2c23..0ab1c88041cd 100644 --- a/tools/testing/selftests/bpf/netcnt_common.h +++ b/tools/testing/selftests/bpf/netcnt_common.h @@ -6,19 +6,39 @@ #define MAX_PERCPU_PACKETS 32 -struct percpu_net_cnt { - __u64 packets; - __u64 bytes; +/* sizeof(struct bpf_local_storage_elem): + * + * It really is about 128 bytes on x86_64, but allocate more to account for + * possible layout changes, different architectures, etc. + * The kernel will wrap up to PAGE_SIZE internally anyway. + */ +#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256 - __u64 prev_ts; +/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */ +#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \ + SIZEOF_BPF_LOCAL_STORAGE_ELEM) - __u64 prev_packets; - __u64 prev_bytes; +#define PCPU_MIN_UNIT_SIZE 32768 + +union percpu_net_cnt { + struct { + __u64 packets; + __u64 bytes; + + __u64 prev_ts; + + __u64 prev_packets; + __u64 prev_bytes; + }; + __u8 data[PCPU_MIN_UNIT_SIZE]; }; -struct net_cnt { - __u64 packets; - __u64 bytes; +union net_cnt { + struct { + __u64 packets; + __u64 bytes; + }; + __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE]; }; #endif diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 2060bc122c53..26468a8f44f3 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms) #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +static int __start_server(int type, const struct sockaddr *addr, + socklen_t addrlen, int timeout_ms, bool reuseport) { - struct sockaddr_storage addr = {}; - socklen_t len; + int on = 1; int fd; - if (make_sockaddr(family, addr_str, port, &addr, &len)) - return -1; - - fd = socket(family, type, 0); + fd = socket(addr->sa_family, type, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; @@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port, if (settimeo(fd, timeout_ms)) goto error_close; - if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { + if (reuseport && + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) { + log_err("Failed to set SO_REUSEPORT"); + return -1; + } + + if (bind(fd, addr, addrlen) < 0) { log_err("Failed to bind socket"); goto error_close; } @@ -104,6 +106,69 @@ error_close: return -1; } +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + struct sockaddr_storage addr; + socklen_t addrlen; + + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) + return -1; + + return __start_server(type, (struct sockaddr *)&addr, + addrlen, timeout_ms, false); +} + +int *start_reuseport_server(int family, int type, const char *addr_str, + __u16 port, int timeout_ms, unsigned int nr_listens) +{ + struct sockaddr_storage addr; + unsigned int nr_fds = 0; + socklen_t addrlen; + int *fds; + + if (!nr_listens) + return NULL; + + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) + return NULL; + + fds = malloc(sizeof(*fds) * nr_listens); + if (!fds) + return NULL; + + fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, + timeout_ms, true); + if (fds[0] == -1) + goto close_fds; + nr_fds = 1; + + if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) + goto close_fds; + + for (; nr_fds < nr_listens; nr_fds++) { + fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, + addrlen, timeout_ms, true); + if (fds[nr_fds] == -1) + goto close_fds; + } + + return fds; + +close_fds: + free_fds(fds, nr_fds); + return NULL; +} + +void free_fds(int *fds, unsigned int nr_close_fds) +{ + if (fds) { + while (nr_close_fds) + close(fds[--nr_close_fds]); + free(fds); + } +} + int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int timeout_ms) { @@ -217,6 +282,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, if (family == AF_INET) { struct sockaddr_in *sin = (void *)addr; + memset(addr, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_port = htons(port); if (addr_str && @@ -230,6 +296,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, } else if (family == AF_INET6) { struct sockaddr_in6 *sin6 = (void *)addr; + memset(addr, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); if (addr_str && diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5e0d51c07b63..d60bc2897770 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -36,6 +36,10 @@ extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); +int *start_reuseport_server(int family, int type, const char *addr_str, + __u16 port, int timeout_ms, + unsigned int nr_listens); +void free_fds(int *fds, unsigned int nr_close_fds); int connect_to_fd(int server_fd, int timeout_ms); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); int fastopen_connect(int server_fd, const char *data, unsigned int data_len, diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c new file mode 100644 index 000000000000..85babb0487b3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include "network_helpers.h" +#include "bpf_dctcp.skel.h" +#include "bpf_cubic.skel.h" +#include "bpf_iter_setsockopt.skel.h" + +static int create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) + return -1; + + return 0; +} + +static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds) +{ + unsigned int i; + + for (i = 0; i < nr_fds; i++) { + if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic", + sizeof("bpf_cubic"))) + return i; + } + + return nr_fds; +} + +static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds) +{ + char tcp_cc[16]; + socklen_t optlen = sizeof(tcp_cc); + unsigned int i; + + for (i = 0; i < nr_fds; i++) { + if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION, + tcp_cc, &optlen) || + strcmp(tcp_cc, "bpf_dctcp")) + return i; + } + + return nr_fds; +} + +static int *make_established(int listen_fd, unsigned int nr_est, + int **paccepted_fds) +{ + int *est_fds, *accepted_fds; + unsigned int i; + + est_fds = malloc(sizeof(*est_fds) * nr_est); + if (!est_fds) + return NULL; + + accepted_fds = malloc(sizeof(*accepted_fds) * nr_est); + if (!accepted_fds) { + free(est_fds); + return NULL; + } + + for (i = 0; i < nr_est; i++) { + est_fds[i] = connect_to_fd(listen_fd, 0); + if (est_fds[i] == -1) + break; + if (set_bpf_cubic(&est_fds[i], 1) != 1) { + close(est_fds[i]); + break; + } + + accepted_fds[i] = accept(listen_fd, NULL, 0); + if (accepted_fds[i] == -1) { + close(est_fds[i]); + break; + } + } + + if (!ASSERT_EQ(i, nr_est, "create established fds")) { + free_fds(accepted_fds, i); + free_fds(est_fds, i); + return NULL; + } + + *paccepted_fds = accepted_fds; + return est_fds; +} + +static unsigned short get_local_port(int fd) +{ + struct sockaddr_in6 addr; + socklen_t addrlen = sizeof(addr); + + if (!getsockname(fd, &addr, &addrlen)) + return ntohs(addr.sin6_port); + + return 0; +} + +static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel, + bool random_retry) +{ + int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL; + unsigned int nr_reuse_listens = 256, nr_est = 256; + int err, iter_fd = -1, listen_fd = -1; + char buf; + + /* Prepare non-reuseport listen_fd */ + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(listen_fd, 0, "start_server")) + return; + if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1, + "set listen_fd to cubic")) + goto done; + iter_skel->bss->listen_hport = get_local_port(listen_fd); + if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0, + "get_local_port(listen_fd)")) + goto done; + + /* Connect to non-reuseport listen_fd */ + est_fds = make_established(listen_fd, nr_est, &accepted_fds); + if (!ASSERT_OK_PTR(est_fds, "create established")) + goto done; + + /* Prepare reuseport listen fds */ + reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM, + "::1", 0, 0, + nr_reuse_listens); + if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server")) + goto done; + if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens), + nr_reuse_listens, "set reuse_listen_fds to cubic")) + goto done; + iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]); + if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0, + "get_local_port(reuse_listen_fds[0])")) + goto done; + + /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */ + iter_skel->bss->random_retry = random_retry; + iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc)); + if (!ASSERT_GE(iter_fd, 0, "create iter_fd")) + goto done; + + while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 && + errno == EAGAIN) + ; + if (!ASSERT_OK(err, "read iter error")) + goto done; + + /* Check reuseport listen fds for dctcp */ + ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens), + nr_reuse_listens, + "check reuse_listen_fds dctcp"); + + /* Check non reuseport listen fd for dctcp */ + ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1, + "check listen_fd dctcp"); + + /* Check established fds for dctcp */ + ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est, + "check est_fds dctcp"); + + /* Check accepted fds for dctcp */ + ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est, + "check accepted_fds dctcp"); + +done: + if (iter_fd != -1) + close(iter_fd); + if (listen_fd != -1) + close(listen_fd); + free_fds(reuse_listen_fds, nr_reuse_listens); + free_fds(accepted_fds, nr_est); + free_fds(est_fds, nr_est); +} + +void test_bpf_iter_setsockopt(void) +{ + struct bpf_iter_setsockopt *iter_skel = NULL; + struct bpf_cubic *cubic_skel = NULL; + struct bpf_dctcp *dctcp_skel = NULL; + struct bpf_link *cubic_link = NULL; + struct bpf_link *dctcp_link = NULL; + + if (create_netns()) + return; + + /* Load iter_skel */ + iter_skel = bpf_iter_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(iter_skel, "iter_skel")) + return; + iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL); + if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter")) + goto done; + + /* Load bpf_cubic */ + cubic_skel = bpf_cubic__open_and_load(); + if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel")) + goto done; + cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); + if (!ASSERT_OK_PTR(cubic_link, "cubic_link")) + goto done; + + /* Load bpf_dctcp */ + dctcp_skel = bpf_dctcp__open_and_load(); + if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel")) + goto done; + dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); + if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link")) + goto done; + + do_bpf_iter_setsockopt(iter_skel, true); + do_bpf_iter_setsockopt(iter_skel, false); + +done: + bpf_link__destroy(cubic_link); + bpf_link__destroy(dctcp_link); + bpf_cubic__destroy(cubic_skel); + bpf_dctcp__destroy(dctcp_skel); + bpf_iter_setsockopt__destroy(iter_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 857e3f26086f..649f87382c8d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num) goto done; } - err = btf__get_from_id(info.btf_id, &btf); + btf = btf__load_from_kernel_by_id(info.btf_id); + err = libbpf_get_error(btf); if (CHECK(err, "cannot get btf from kernel, err: %d", err)) goto done; @@ -4386,6 +4387,7 @@ skip: fprintf(stderr, "OK"); done: + btf__free(btf); free(func_info); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 1b90e684ff13..52ccf0cf35e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -232,7 +232,593 @@ err_out: btf__free(btf); } +#define STRSIZE 4096 + +static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args) +{ + char *s = ctx, new[STRSIZE]; + + vsnprintf(new, STRSIZE, fmt, args); + if (strlen(s) < STRSIZE) + strncat(s, new, STRSIZE - strlen(s) - 1); +} + +static int btf_dump_data(struct btf *btf, struct btf_dump *d, + char *name, char *prefix, __u64 flags, void *ptr, + size_t ptr_sz, char *str, const char *expected_val) +{ + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + size_t type_sz; + __s32 type_id; + int ret = 0; + + if (flags & BTF_F_COMPACT) + opts.compact = true; + if (flags & BTF_F_NONAME) + opts.skip_names = true; + if (flags & BTF_F_ZERO) + opts.emit_zeroes = true; + if (prefix) { + ASSERT_STRNEQ(name, prefix, strlen(prefix), + "verify prefix match"); + name += strlen(prefix) + 1; + } + type_id = btf__find_by_name(btf, name); + if (!ASSERT_GE(type_id, 0, "find type id")) + return -ENOENT; + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } else { + if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG")) + return -EINVAL; + } + if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \ + _expected, ...) \ + do { \ + char __ptrtype[64] = #_type; \ + char *_ptrtype = (char *)__ptrtype; \ + _type _ptrdata = __VA_ARGS__; \ + void *_ptr = &_ptrdata; \ + \ + (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \ + _ptr, sizeof(_type), _str, \ + _expected); \ + } while (0) + +/* Use where expected data string matches its stringified declaration */ +#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \ + ...) \ + TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \ + "(" #_type ")" #__VA_ARGS__, __VA_ARGS__) + +/* overflow test; pass typesize < expected type size, ensure E2BIG returned */ +#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \ + _expected, ...) \ + do { \ + char __ptrtype[64] = #_type; \ + char *_ptrtype = (char *)__ptrtype; \ + _type _ptrdata = __VA_ARGS__; \ + void *_ptr = &_ptrdata; \ + \ + (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \ + _ptr, _type_sz, _str, _expected); \ + } while (0) + +#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \ + _expected, ...) \ + do { \ + _type _ptrdata = __VA_ARGS__; \ + void *_ptr = &_ptrdata; \ + \ + (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \ + _ptr, sizeof(_type), _str, \ + _expected); \ + } while (0) + +static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, + char *str) +{ +#ifdef __SIZEOF_INT128__ + __int128 i = 0xffffffffffffffff; + + /* this dance is required because we cannot directly initialize + * a 128-bit value to anything larger than a 64-bit value. + */ + i = (i << 64) | (i - 1); +#endif + /* simple int */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, + "1234", 1234); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234); + + /* zero value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO, + "(int)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "0", 0); + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, + "-4567", -4567); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567); + + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1); + +#ifdef __SIZEOF_INT128__ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT, + "(__int128)0xffffffffffffffff", + 0xffffffffffffffff); + ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str, + "(__int128)0xfffffffffffffffffffffffffffffffe"), + "dump __int128"); +#endif +} + +static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + float t1 = 1.234567; + float t2 = -1.234567; + float t3 = 0.0; + double t4 = 5.678912; + double t5 = -5.678912; + double t6 = 0.0; + long double t7 = 9.876543; + long double t8 = -9.876543; + long double t9 = 0.0; + + /* since the kernel does not likely have any float types in its BTF, we + * will need to add some of various sizes. + */ + + ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float"); + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str, + "(test_float)1.234567"), "dump float"); + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str, + "(test_float)-1.234567"), "dump float"); + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str, + "(test_float)0.000000"), "dump float"); + + ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double"); + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str, + "(test_double)5.678912"), "dump double"); + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str, + "(test_double)-5.678912"), "dump double"); + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str, + "(test_double)0.000000"), "dump double"); + + ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double"); + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16, + str, "(test_long_double)9.876543"), + "dump long_double"); + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16, + str, "(test_long_double)-9.876543"), + "dump long_double"); + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16, + str, "(test_long_double)0.000000"), + "dump long_double"); +} + +static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + /* simple char */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME, + "100", 100); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100); + /* zero value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT, + "(char)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO, + "(char)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0); + + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100); +} + +static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + /* simple typedef */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME, + "1", 1); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1); + /* zero value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO, + "(u64)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0); + + /* typedef struct */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, + {.counter = (int)1,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME, + "{1,}", { .counter = 1 }); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0, +"(atomic_t){\n" +" .counter = (int)1,\n" +"}", + {.counter = 1,}); + /* typedef with 0 value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}", + {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME, + "{}", {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0, +"(atomic_t){\n" +"}", + {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO, + "(atomic_t){.counter = (int)0,}", + {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "{0,}", {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO, +"(atomic_t){\n" +" .counter = (int)0,\n" +"}", + { .counter = 0,}); + + /* overflow should show type but not value since it overflows */ + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1, + "(atomic_t){\n", { .counter = 1}); +} + +static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + /* enum where enum value does (and does not) exist */ + TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, + "(enum bpf_cmd)BPF_MAP_CREATE", 0); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME, + "BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0, + "(enum bpf_cmd)BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "BPF_MAP_CREATE", 0); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_ZERO, + "(enum bpf_cmd)BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "BPF_MAP_CREATE", BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME, + "2000", 2000); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0, + "(enum bpf_cmd)2000", 2000); + + TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd, + sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE); +} + +static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + char zero_data[512] = { }; + char type_data[512]; + void *fops = type_data; + void *skb = type_data; + size_t type_sz; + __s32 type_id; + char *cmpstr; + int ret; + + memset(type_data, 255, sizeof(type_data)); + + /* simple struct */ + TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT, + {.name_off = (__u32)3,.val = (__s32)-1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME, + "{3,-1,}", + { .name_off = 3, .val = -1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0, +"(struct btf_enum){\n" +" .name_off = (__u32)3,\n" +" .val = (__s32)-1,\n" +"}", + { .name_off = 3, .val = -1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME, + "{-1,}", + { .name_off = 0, .val = -1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "{0,-1,}", + { .name_off = 0, .val = -1,}); + /* empty struct should be printed */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT, + "(struct btf_enum){}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME, + "{}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0, +"(struct btf_enum){\n" +"}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_ZERO, + "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_ZERO, +"(struct btf_enum){\n" +" .name_off = (__u32)0,\n" +" .val = (__s32)0,\n" +"}", + { .name_off = 0, .val = 0,}); + + /* struct with pointers */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT, + "(struct list_head){.next = (struct list_head *)0x1,}", + { .next = (struct list_head *)1 }); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0, +"(struct list_head){\n" +" .next = (struct list_head *)0x1,\n" +"}", + { .next = (struct list_head *)1 }); + /* NULL pointer should not be displayed */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT, + "(struct list_head){}", + { .next = (struct list_head *)0 }); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0, +"(struct list_head){\n" +"}", + { .next = (struct list_head *)0 }); + + /* struct with function pointers */ + type_id = btf__find_by_name(btf, "file_operations"); + if (ASSERT_GT(type_id, 0, "find type id")) { + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + + ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts); + ASSERT_EQ(ret, type_sz, + "unexpected return value dumping file_operations"); + cmpstr = +"(struct file_operations){\n" +" .owner = (struct module *)0xffffffffffffffff,\n" +" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,"; + + ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations"); + } + + /* struct with char array */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, + "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}", + { .name = "foo",}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, + BTF_F_COMPACT | BTF_F_NONAME, + "{['f','o','o',],}", + {.name = "foo",}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0, +"(struct bpf_prog_info){\n" +" .name = (char[16])[\n" +" 'f',\n" +" 'o',\n" +" 'o',\n" +" ],\n" +"}", + {.name = "foo",}); + /* leading null char means do not display string */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, + "(struct bpf_prog_info){}", + {.name = {'\0', 'f', 'o', 'o'}}); + /* handle non-printable characters */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, + "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}", + { .name = {1, 2, 3, 0}}); + + /* struct with non-char array */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT, + "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}", + { .cb = {1, 2, 3, 4, 5,},}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, + BTF_F_COMPACT | BTF_F_NONAME, + "{[1,2,3,4,5,],}", + { .cb = { 1, 2, 3, 4, 5},}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0, +"(struct __sk_buff){\n" +" .cb = (__u32[5])[\n" +" 1,\n" +" 2,\n" +" 3,\n" +" 4,\n" +" 5,\n" +" ],\n" +"}", + { .cb = { 1, 2, 3, 4, 5},}); + /* For non-char, arrays, show non-zero values only */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT, + "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}", + { .cb = { 0, 0, 1, 0, 0},}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0, +"(struct __sk_buff){\n" +" .cb = (__u32[5])[\n" +" 0,\n" +" 0,\n" +" 1,\n" +" 0,\n" +" 0,\n" +" ],\n" +"}", + { .cb = { 0, 0, 1, 0, 0},}); + + /* struct with bitfields */ + TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT, + {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, + BTF_F_COMPACT | BTF_F_NONAME, + "{1,0x2,0x3,4,5,}", + { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4, + .imm = 5,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0, +"(struct bpf_insn){\n" +" .code = (__u8)1,\n" +" .dst_reg = (__u8)0x2,\n" +" .src_reg = (__u8)0x3,\n" +" .off = (__s16)4,\n" +" .imm = (__s32)5,\n" +"}", + {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5}); + + /* zeroed bitfields should not be displayed */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT, + "(struct bpf_insn){.dst_reg = (__u8)0x1,}", + { .code = 0, .dst_reg = 1}); + + /* struct with enum bitfield */ + type_id = btf__find_by_name(btf, "fs_context"); + if (ASSERT_GT(type_id, 0, "find fs_context")) { + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + + opts.emit_zeroes = true; + ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts); + ASSERT_EQ(ret, type_sz, + "unexpected return value dumping fs_context"); + + ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL, + "bitfield value not present"); + } + + /* struct with nested anon union */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT, + "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}", + { .op = 1, .args = { 1, 2, 3, 4}}); + + /* union with nested struct */ + TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT, + "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}", + { .map = { .map_fd = 1 }}); + + /* struct skb with nested structs/unions; because type output is so + * complex, we don't do a string comparison, just verify we return + * the type size as the amount of data displayed. + */ + type_id = btf__find_by_name(btf, "sk_buff"); + if (ASSERT_GT(type_id, 0, "find struct sk_buff")) { + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + + ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts); + ASSERT_EQ(ret, type_sz, + "unexpected return value dumping sk_buff"); + } + + /* overflow bpf_sock_ops struct with final element nonzero/zero. + * Regardless of the value of the final field, we don't have all the + * data we need to display it, so we should trigger an overflow. + * In other words oveflow checking should trump "is field zero?" + * checks because if we've overflowed, it shouldn't matter what the + * field is - we can't trust its value so shouldn't display it. + */ + TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, + sizeof(struct bpf_sock_ops) - 1, + "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", + { .op = 1, .skb_tcp_flags = 2}); + TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, + sizeof(struct bpf_sock_ops) - 1, + "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", + { .op = 1, .skb_tcp_flags = 0}); +} + +static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, + "int cpu_number = (int)100", 100); + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, + "static int cpu_profile_flip = (int)2", 2); +} + +static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, + const char *name, const char *expected_val, + void *data, size_t data_sz) +{ + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + int ret = 0, cmp; + size_t secsize; + __s32 type_id; + + opts.compact = true; + + type_id = btf__find_by_name(btf, name); + if (!ASSERT_GT(type_id, 0, "find type id")) + return; + + secsize = btf__resolve_size(btf, type_id); + ASSERT_EQ(secsize, 0, "verify section size"); + + str[0] = '\0'; + ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts); + ASSERT_EQ(ret, 0, "unexpected return value"); + + cmp = strcmp(str, expected_val); + ASSERT_EQ(cmp, 0, "ensure expected/actual match"); +} + +static void test_btf_dump_datasec_data(char *str) +{ + struct btf *btf = btf__parse("xdping_kern.o", NULL); + struct btf_dump_opts opts = { .ctx = str }; + char license[4] = "GPL"; + struct btf_dump *d; + + if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found")) + return; + + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + test_btf_datasec(btf, d, str, "license", + "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];", + license, sizeof(license)); +} + void test_btf_dump() { + char str[STRSIZE]; + struct btf_dump_opts opts = { .ctx = str }; + struct btf_dump *d; + struct btf *btf; int i; for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) { @@ -245,4 +831,33 @@ void test_btf_dump() { } if (test__start_subtest("btf_dump: incremental")) test_btf_dump_incremental(); + + btf = libbpf_find_kernel_btf(); + if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) + return; + + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Verify type display for various types. */ + if (test__start_subtest("btf_dump: int_data")) + test_btf_dump_int_data(btf, d, str); + if (test__start_subtest("btf_dump: float_data")) + test_btf_dump_float_data(btf, d, str); + if (test__start_subtest("btf_dump: char_data")) + test_btf_dump_char_data(btf, d, str); + if (test__start_subtest("btf_dump: typedef_data")) + test_btf_dump_typedef_data(btf, d, str); + if (test__start_subtest("btf_dump: enum_data")) + test_btf_dump_enum_data(btf, d, str); + if (test__start_subtest("btf_dump: struct_data")) + test_btf_dump_struct_data(btf, d, str); + if (test__start_subtest("btf_dump: var_data")) + test_btf_dump_var_data(btf, d, str); + btf_dump__free(d); + btf__free(btf); + + if (test__start_subtest("btf_dump: datasec_data")) + test_btf_dump_datasec_data(str); } diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 981c251453d9..3d4b2a358d47 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -53,8 +53,8 @@ void test_core_autosize(void) char btf_file[] = "/tmp/core_autosize.btf.XXXXXX"; int err, fd = -1, zero = 0; int char_id, short_id, int_id, long_long_id, void_ptr_id, id; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); struct test_core_autosize* skel = NULL; - struct bpf_object_load_attr load_attr = {}; struct bpf_program *prog; struct bpf_map *bss_map; struct btf *btf = NULL; @@ -125,9 +125,10 @@ void test_core_autosize(void) fd = -1; /* open and load BPF program with custom BTF as the kernel BTF */ - skel = test_core_autosize__open(); + open_opts.btf_custom_path = btf_file; + skel = test_core_autosize__open_opts(&open_opts); if (!ASSERT_OK_PTR(skel, "skel_open")) - return; + goto cleanup; /* disable handle_signed() for now */ prog = bpf_object__find_program_by_name(skel->obj, "handle_signed"); @@ -135,9 +136,7 @@ void test_core_autosize(void) goto cleanup; bpf_program__set_autoload(prog, false); - load_attr.obj = skel->obj; - load_attr.target_btf_path = btf_file; - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(skel->obj); if (!ASSERT_OK(err, "prog_load")) goto cleanup; @@ -204,14 +203,13 @@ void test_core_autosize(void) skel = NULL; /* now re-load with handle_signed() enabled, it should fail loading */ - skel = test_core_autosize__open(); + open_opts.btf_custom_path = btf_file; + skel = test_core_autosize__open_opts(&open_opts); if (!ASSERT_OK_PTR(skel, "skel_open")) - return; + goto cleanup; - load_attr.obj = skel->obj; - load_attr.target_btf_path = btf_file; - err = bpf_object__load_xattr(&load_attr); - if (!ASSERT_ERR(err, "bad_prog_load")) + err = test_core_autosize__load(skel); + if (!ASSERT_ERR(err, "skel_load")) goto cleanup; cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index d02e064c535f..4739b15b2a97 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -816,7 +816,7 @@ static size_t roundup_page(size_t sz) void test_core_reloc(void) { const size_t mmap_sz = roundup_page(sizeof(struct data)); - struct bpf_object_load_attr load_attr = {}; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); struct core_reloc_test_case *test_case; const char *tp_name, *probe_name; int err, i, equal; @@ -846,9 +846,16 @@ void test_core_reloc(void) continue; } - obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); + if (test_case->btf_src_file) { + err = access(test_case->btf_src_file, R_OK); + if (!ASSERT_OK(err, "btf_src_file")) + goto cleanup; + } + + open_opts.btf_custom_path = test_case->btf_src_file; + obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts); if (!ASSERT_OK_PTR(obj, "obj_open")) - continue; + goto cleanup; probe_name = "raw_tracepoint/sys_enter"; tp_name = "sys_enter"; @@ -862,17 +869,7 @@ void test_core_reloc(void) "prog '%s' not found\n", probe_name)) goto cleanup; - - if (test_case->btf_src_file) { - err = access(test_case->btf_src_file, R_OK); - if (!ASSERT_OK(err, "btf_src_file")) - goto cleanup; - } - - load_attr.obj = obj; - load_attr.log_level = 0; - load_attr.target_btf_path = test_case->btf_src_file; - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { if (!test_case->fails) ASSERT_OK(err, "obj_load"); diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c new file mode 100644 index 000000000000..02a465f36d59 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "get_func_ip_test.skel.h" + +void test_get_func_ip_test(void) +{ + struct get_func_ip_test *skel = NULL; + __u32 duration = 0, retval; + int err, prog_fd; + + skel = get_func_ip_test__open(); + if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open")) + return; + + /* test6 is x86_64 specifc because of the instruction + * offset, disabling it for all other archs + */ +#ifndef __x86_64__ + bpf_program__set_autoload(skel->progs.test6, false); + bpf_program__set_autoload(skel->progs.test7, false); +#endif + + err = get_func_ip_test__load(skel); + if (!ASSERT_OK(err, "get_func_ip_test__load")) + goto cleanup; + + err = get_func_ip_test__attach(skel); + if (!ASSERT_OK(err, "get_func_ip_test__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); + + prog_fd = bpf_program__fd(skel->progs.test5); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + + ASSERT_OK(err, "test_run"); + + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); + ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); + ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); + ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); + ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); +#ifdef __x86_64__ + ASSERT_EQ(skel->bss->test6_result, 1, "test6_result"); + ASSERT_EQ(skel->bss->test7_result, 1, "test7_result"); +#endif + +cleanup: + get_func_ip_test__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index fcf54b3a1dd0..d4b953ae3407 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -125,6 +125,10 @@ void test_pinning(void) if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) goto out; + /* get pinning path */ + if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path")) + goto out; + /* set pinning path of other map and re-pin all */ map = bpf_object__find_map_by_name(obj, "nopinmap"); if (CHECK(!map, "find map", "NULL map")) @@ -134,6 +138,11 @@ void test_pinning(void) if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) goto out; + /* get pinning path after set */ + if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath, + "get pin path after set")) + goto out; + /* should only pin the one unpinned map */ err = bpf_object__pin_maps(obj, NULL); if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 515229f24a93..a9f1bf9d5dff 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -351,9 +351,11 @@ static void test_insert_opened(int family, int sotype, int mapfd) errno = 0; value = s; err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); - if (!err || errno != EOPNOTSUPP) - FAIL_ERRNO("map_update: expected EOPNOTSUPP"); - + if (sotype == SOCK_STREAM) { + if (!err || errno != EOPNOTSUPP) + FAIL_ERRNO("map_update: expected EOPNOTSUPP"); + } else if (err) + FAIL_ERRNO("map_update: expected success"); xclose(s); } @@ -919,6 +921,23 @@ static const char *redir_mode_str(enum redir_mode mode) } } +static int add_to_sockmap(int sock_mapfd, int fd1, int fd2) +{ + u64 value; + u32 key; + int err; + + key = 0; + value = fd1; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + return err; + + key = 1; + value = fd2; + return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); +} + static void redir_to_connected(int family, int sotype, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { @@ -928,7 +947,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, unsigned int pass; socklen_t len; int err, n; - u64 value; u32 key; char b; @@ -965,15 +983,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (p1 < 0) goto close_cli1; - key = 0; - value = p0; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); - if (err) - goto close_peer1; - - key = 1; - value = p1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + err = add_to_sockmap(sock_mapfd, p0, p1); if (err) goto close_peer1; @@ -1061,7 +1071,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, int s, c, p, err, n; unsigned int drop; socklen_t len; - u64 value; u32 key; zero_verdict_count(verd_mapfd); @@ -1086,15 +1095,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, if (p < 0) goto close_cli; - key = 0; - value = s; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); - if (err) - goto close_peer; - - key = 1; - value = p; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + err = add_to_sockmap(sock_mapfd, s, p); if (err) goto close_peer; @@ -1346,7 +1347,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, int s1, s2, c, err; unsigned int drop; socklen_t len; - u64 value; u32 key; zero_verdict_count(verd_map); @@ -1360,16 +1360,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, if (s2 < 0) goto close_srv1; - key = 0; - value = s1; - err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + err = add_to_sockmap(sock_map, s1, s2); if (err) goto close_srv2; - key = 1; - value = s2; - err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); - /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */ len = sizeof(addr); err = xgetsockname(s2, sockaddr(&addr), &len); @@ -1441,6 +1435,8 @@ static const char *family_str(sa_family_t family) return "IPv4"; case AF_INET6: return "IPv6"; + case AF_UNIX: + return "Unix"; default: return "unknown"; } @@ -1563,6 +1559,99 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } +static void unix_redir_to_connected(int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int retries = 100; + int err, n; + int sfd[2]; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + return; + c0 = sfd[0], p0 = sfd[1]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + goto close0; + c1 = sfd[0], p1 = sfd[1]; + + err = add_to_sockmap(sock_mapfd, p0, p1); + if (err) + goto close; + + n = write(c1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto close; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); + +again: + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); + if (n < 0) { + if (errno == EAGAIN && retries--) + goto again; + FAIL_ERRNO("%s: read", log_prefix); + } + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close: + xclose(c1); + xclose(p1); +close0: + xclose(c0); + xclose(p0); +} + +static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); + if (err) + return; + + skel->bss->test_ingress = false; + unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS); + skel->bss->test_ingress = true; + unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int sotype) +{ + const char *family_name, *map_name; + char s[MAX_TEST_NAME]; + + family_name = family_str(AF_UNIX); + map_name = map_type_str(map); + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); + if (!test__start_subtest(s)) + return; + unix_skb_redir_to_connected(skel, map, sotype); +} + static void test_reuseport(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { @@ -1603,33 +1692,27 @@ static void test_reuseport(struct test_sockmap_listen *skel, } } -static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static int udp_socketpair(int family, int *s, int *c) { - const char *log_prefix = redir_mode_str(mode); struct sockaddr_storage addr; - int c0, c1, p0, p1; - unsigned int pass; - int retries = 100; socklen_t len; - int err, n; - u64 value; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int p0, c0; + int err; - p0 = socket_loopback(family, sotype | SOCK_NONBLOCK); + p0 = socket_loopback(family, SOCK_DGRAM | SOCK_NONBLOCK); if (p0 < 0) - return; + return p0; + len = sizeof(addr); err = xgetsockname(p0, sockaddr(&addr), &len); if (err) goto close_peer0; - c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0); - if (c0 < 0) + c0 = xsocket(family, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (c0 < 0) { + err = c0; goto close_peer0; + } err = xconnect(c0, sockaddr(&addr), len); if (err) goto close_cli0; @@ -1640,35 +1723,131 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, if (err) goto close_cli0; - p1 = socket_loopback(family, sotype | SOCK_NONBLOCK); - if (p1 < 0) - goto close_cli0; - err = xgetsockname(p1, sockaddr(&addr), &len); + *s = p0; + *c = c0; + return 0; + +close_cli0: + xclose(c0); +close_peer0: + xclose(p0); + return err; +} + +static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, + enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int retries = 100; + int err, n; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + err = udp_socketpair(family, &p0, &c0); + if (err) + return; + err = udp_socketpair(family, &p1, &c1); if (err) goto close_cli0; - c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0); - if (c1 < 0) - goto close_peer1; - err = xconnect(c1, sockaddr(&addr), len); + err = add_to_sockmap(sock_mapfd, p0, p1); if (err) goto close_cli1; - err = xgetsockname(c1, sockaddr(&addr), &len); - if (err) + + n = write(c1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) goto close_cli1; - err = xconnect(p1, sockaddr(&addr), len); + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); if (err) goto close_cli1; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); - key = 0; - value = p0; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); +again: + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); + if (n < 0) { + if (errno == EAGAIN && retries--) + goto again; + FAIL_ERRNO("%s: read", log_prefix); + } + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close_cli1: + xclose(c1); + xclose(p1); +close_cli0: + xclose(c0); + xclose(p0); +} + +static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); if (err) - goto close_cli1; + return; - key = 1; - value = p1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + skel->bss->test_ingress = false; + udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); + skel->bss->test_ingress = true; + udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int family) +{ + const char *family_name, *map_name; + char s[MAX_TEST_NAME]; + + family_name = family_str(family); + map_name = map_type_str(map); + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); + if (!test__start_subtest(s)) + return; + udp_skb_redir_to_connected(skel, map, family); +} + +static void udp_unix_redir_to_connected(int family, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int retries = 100; + int err, n; + int sfd[2]; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) + return; + c0 = sfd[0], p0 = sfd[1]; + + err = udp_socketpair(family, &p1, &c1); + if (err) + goto close; + + err = add_to_sockmap(sock_mapfd, p0, p1); if (err) goto close_cli1; @@ -1699,16 +1878,89 @@ again: close_cli1: xclose(c1); -close_peer1: + xclose(p1); +close: + xclose(c0); + xclose(p0); +} + +static void udp_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); + if (err) + return; + + skel->bss->test_ingress = false; + udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); + skel->bss->test_ingress = true; + udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void unix_udp_redir_to_connected(int family, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int err, n; + int sfd[2]; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + err = udp_socketpair(family, &p0, &c0); + if (err) + return; + + if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) + goto close_cli0; + c1 = sfd[0], p1 = sfd[1]; + + err = add_to_sockmap(sock_mapfd, p0, p1); + if (err) + goto close; + + n = write(c1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto close; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); + + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); + if (n < 0) + FAIL_ERRNO("%s: read", log_prefix); + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close: + xclose(c1); xclose(p1); close_cli0: xclose(c0); -close_peer0: xclose(p0); + } -static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) +static void unix_udp_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family) { int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); int verdict_map = bpf_map__fd(skel->maps.verdict_map); @@ -1720,17 +1972,15 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, return; skel->bss->test_ingress = false; - udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_EGRESS); + unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); skel->bss->test_ingress = true; - udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_INGRESS); + unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); } -static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) +static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int family) { const char *family_name, *map_name; char s[MAX_TEST_NAME]; @@ -1740,7 +1990,8 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); if (!test__start_subtest(s)) return; - udp_skb_redir_to_connected(skel, map, family); + udp_unix_skb_redir_to_connected(skel, map, family); + unix_udp_skb_redir_to_connected(skel, map, family); } static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, @@ -1752,6 +2003,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_reuseport(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_DGRAM); test_udp_redir(skel, map, family); + test_udp_unix_redir(skel, map, family); } void test_sockmap_listen(void) @@ -1767,10 +2019,12 @@ void test_sockmap_listen(void) skel->bss->test_sockmap = true; run_tests(skel, skel->maps.sock_map, AF_INET); run_tests(skel, skel->maps.sock_map, AF_INET6); + test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); skel->bss->test_sockmap = false; run_tests(skel, skel->maps.sock_hash, AF_INET); run_tests(skel, skel->maps.sock_hash, AF_INET6); + test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); test_sockmap_listen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 5703c918812b..932e4ee3f97c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -13,15 +13,16 @@ #define _GNU_SOURCE #include <arpa/inet.h> +#include <linux/if.h> +#include <linux/if_tun.h> #include <linux/limits.h> #include <linux/sysctl.h> -#include <linux/if_tun.h> -#include <linux/if.h> #include <sched.h> #include <stdbool.h> #include <stdio.h> -#include <sys/stat.h> #include <sys/mount.h> +#include <sys/stat.h> +#include <unistd.h> #include "test_progs.h" #include "network_helpers.h" @@ -389,11 +390,21 @@ done: close(client_fd); } -static int test_ping(int family, const char *addr) +static char *ping_command(int family) { - const char *ping = family == AF_INET6 ? "ping6" : "ping"; + if (family == AF_INET6) { + /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ + if (!system("which ping6 >/dev/null 2>&1")) + return "ping6"; + else + return "ping -6"; + } + return "ping"; +} - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr); +static int test_ping(int family, const char *addr) +{ + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); return 0; fail: return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c new file mode 100644 index 000000000000..25f40e1b9967 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "timer.skel.h" + +static int timer(struct timer *timer_skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + + err = timer__attach(timer_skel); + if (!ASSERT_OK(err, "timer_attach")) + return err; + + ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1"); + ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1"); + + prog_fd = bpf_program__fd(timer_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); + timer__detach(timer_skel); + + usleep(50); /* 10 usecs should be enough, but give it extra */ + /* check that timer_cb1() was executed 10+10 times */ + ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2"); + ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2"); + + /* check that timer_cb2() was executed twice */ + ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data"); + + /* check that there were no errors in timer execution */ + ASSERT_EQ(timer_skel->bss->err, 0, "err"); + + /* check that code paths completed */ + ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); + + return 0; +} + +void test_timer(void) +{ + struct timer *timer_skel = NULL; + int err; + + timer_skel = timer__open_and_load(); + if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) + goto cleanup; + + err = timer(timer_skel); + ASSERT_OK(err, "timer"); +cleanup: + timer__destroy(timer_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c new file mode 100644 index 000000000000..f5acbcbe33a4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "timer_mim.skel.h" +#include "timer_mim_reject.skel.h" + +static int timer_mim(struct timer_mim *timer_skel) +{ + __u32 duration = 0, retval; + __u64 cnt1, cnt2; + int err, prog_fd, key1 = 1; + + err = timer_mim__attach(timer_skel); + if (!ASSERT_OK(err, "timer_attach")) + return err; + + prog_fd = bpf_program__fd(timer_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); + timer_mim__detach(timer_skel); + + /* check that timer_cb[12] are incrementing 'cnt' */ + cnt1 = READ_ONCE(timer_skel->bss->cnt); + usleep(200); /* 100 times more than interval */ + cnt2 = READ_ONCE(timer_skel->bss->cnt); + ASSERT_GT(cnt2, cnt1, "cnt"); + + ASSERT_EQ(timer_skel->bss->err, 0, "err"); + /* check that code paths completed */ + ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok"); + + close(bpf_map__fd(timer_skel->maps.inner_htab)); + err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1); + ASSERT_EQ(err, 0, "delete inner map"); + + /* check that timer_cb[12] are no longer running */ + cnt1 = READ_ONCE(timer_skel->bss->cnt); + usleep(200); + cnt2 = READ_ONCE(timer_skel->bss->cnt); + ASSERT_EQ(cnt2, cnt1, "cnt"); + + return 0; +} + +void test_timer_mim(void) +{ + struct timer_mim_reject *timer_reject_skel = NULL; + libbpf_print_fn_t old_print_fn = NULL; + struct timer_mim *timer_skel = NULL; + int err; + + old_print_fn = libbpf_set_print(NULL); + timer_reject_skel = timer_mim_reject__open_and_load(); + libbpf_set_print(old_print_fn); + if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load")) + goto cleanup; + + timer_skel = timer_mim__open_and_load(); + if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) + goto cleanup; + + err = timer_mim(timer_skel); + ASSERT_OK(err, "timer_mim"); +cleanup: + timer_mim__destroy(timer_skel); + timer_mim_reject__destroy(timer_reject_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c new file mode 100644 index 000000000000..ab4952b9fb1d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include "test_xdp_context_test_run.skel.h" + +void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, + __u32 data_meta, __u32 data, __u32 data_end, + __u32 ingress_ifindex, __u32 rx_queue_index, + __u32 egress_ifindex) +{ + struct xdp_md ctx = { + .data = data, + .data_end = data_end, + .data_meta = data_meta, + .ingress_ifindex = ingress_ifindex, + .rx_queue_index = rx_queue_index, + .egress_ifindex = egress_ifindex, + }; + int err; + + opts.ctx_in = &ctx; + opts.ctx_size_in = sizeof(ctx); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(errno, EINVAL, "errno-EINVAL"); + ASSERT_ERR(err, "bpf_prog_test_run"); +} + +void test_xdp_context_test_run(void) +{ + struct test_xdp_context_test_run *skel = NULL; + char data[sizeof(pkt_v4) + sizeof(__u32)]; + char bad_ctx[sizeof(struct xdp_md) + 1]; + struct xdp_md ctx_in, ctx_out; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = sizeof(data), + .ctx_out = &ctx_out, + .ctx_size_out = sizeof(ctx_out), + .repeat = 1, + ); + int err, prog_fd; + + skel = test_xdp_context_test_run__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + return; + prog_fd = bpf_program__fd(skel->progs.xdp_context); + + /* Data past the end of the kernel's struct xdp_md must be 0 */ + bad_ctx[sizeof(bad_ctx) - 1] = 1; + opts.ctx_in = bad_ctx; + opts.ctx_size_in = sizeof(bad_ctx); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(errno, E2BIG, "extradata-errno"); + ASSERT_ERR(err, "bpf_prog_test_run(extradata)"); + + *(__u32 *)data = XDP_PASS; + *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4; + opts.ctx_in = &ctx_in; + opts.ctx_size_in = sizeof(ctx_in); + memset(&ctx_in, 0, sizeof(ctx_in)); + ctx_in.data_meta = 0; + ctx_in.data = sizeof(__u32); + ctx_in.data_end = ctx_in.data + sizeof(pkt_v4); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(err, "bpf_prog_test_run(valid)"); + ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval"); + ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize"); + ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize"); + ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta"); + ASSERT_EQ(ctx_out.data, 0, "valid-data"); + ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend"); + + /* Meta data's size must be a multiple of 4 */ + test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0); + + /* data_meta must reference the start of data */ + test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data), + 0, 0, 0); + + /* Meta data must be 32 bytes or smaller */ + test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0); + + /* Total size of data must match data_end - data_meta */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), + sizeof(data) - 1, 0, 0, 0); + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), + sizeof(data) + 1, 0, 0, 0); + + /* RX queue cannot be specified without specifying an ingress */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 0, 1, 0); + + /* Interface 1 is always the loopback interface which always has only + * one RX queue (index 0). This makes index 1 an invalid rx queue index + * for interface 1. + */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 1, 1, 0); + + /* The egress cannot be specified */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 0, 0, 1); + + test_xdp_context_test_run__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 0176573fe4e7..8755effd80b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -7,64 +7,53 @@ #define IFINDEX_LO 1 -void test_xdp_with_cpumap_helpers(void) +void test_xdp_cpumap_attach(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; + __u32 len = sizeof(info); struct bpf_cpumap_val val = { .qsize = 192, }; - __u32 duration = 0, idx = 0; - __u32 len = sizeof(info); int err, prog_fd, map_fd; + __u32 idx = 0; skel = test_xdp_with_cpumap_helpers__open_and_load(); - if (CHECK_FAIL(!skel)) { - perror("test_xdp_with_cpumap_helpers__open_and_load"); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) return; - } - /* can not attach program with cpumaps that allow programs - * as xdp generic - */ prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP", - "should have failed\n"); + if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP")) + goto out_close; + + err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + ASSERT_OK(err, "XDP program detach"); prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); map_fd = bpf_map__fd(skel->maps.cpu_map); err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) goto out_close; val.bpf_prog.fd = prog_fd; err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err, "Add program to cpumap entry", "err %d errno %d\n", - err, errno); + ASSERT_OK(err, "Add program to cpumap entry"); err = bpf_map_lookup_elem(map_fd, &idx, &val); - CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno); - CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry", - "expected %u read %u\n", info.id, val.bpf_prog.id); + ASSERT_OK(err, "Read cpumap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); /* can not attach BPF_XDP_CPUMAP program to a device */ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program", - "should have failed\n"); + if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program")) + bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); val.qsize = 192; val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry", - "should have failed\n"); + ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry"); out_close: test_xdp_with_cpumap_helpers__destroy(skel); } - -void test_xdp_cpumap_attach(void) -{ - if (test__start_subtest("cpumap_with_progs")) - test_xdp_with_cpumap_helpers(); -} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 88ef3ec8ac4c..c72af030ff10 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void) .ifindex = IFINDEX_LO, }; __u32 len = sizeof(info); - __u32 duration = 0, idx = 0; int err, dm_fd, map_fd; + __u32 idx = 0; skel = test_xdp_with_devmap_helpers__open_and_load(); - if (CHECK_FAIL(!skel)) { - perror("test_xdp_with_devmap_helpers__open_and_load"); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) return; - } - /* can not attach program with DEVMAPs that allow programs - * as xdp generic - */ dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog); err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Generic attach of program with 8-byte devmap", - "should have failed\n"); + if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap")) + goto out_close; + + err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + ASSERT_OK(err, "XDP program detach"); dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); map_fd = bpf_map__fd(skel->maps.dm_ports); err = bpf_obj_get_info_by_fd(dm_fd, &info, &len); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) goto out_close; val.bpf_prog.fd = dm_fd; err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err, "Add program to devmap entry", - "err %d errno %d\n", err, errno); + ASSERT_OK(err, "Add program to devmap entry"); err = bpf_map_lookup_elem(map_fd, &idx, &val); - CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno); - CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry", - "expected %u read %u\n", info.id, val.bpf_prog.id); + ASSERT_OK(err, "Read devmap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id"); /* can not attach BPF_XDP_DEVMAP program to a device */ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program", - "should have failed\n"); + if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program")) + bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); val.ifindex = 1; val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry", - "should have failed\n"); + ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry"); out_close: test_xdp_with_devmap_helpers__destroy(skel); @@ -68,12 +63,10 @@ out_close: void test_neg_xdp_devmap_helpers(void) { struct test_xdp_devmap_helpers *skel; - __u32 duration = 0; skel = test_xdp_devmap_helpers__open_and_load(); - if (CHECK(skel, - "Load of XDP program accessing egress ifindex without attach type", - "should have failed\n")) { + if (!ASSERT_EQ(skel, NULL, + "Load of XDP program accessing egress ifindex without attach type")) { test_xdp_devmap_helpers__destroy(skel); } } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c new file mode 100644 index 000000000000..b77adfd55d73 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define bpf_tcp_sk(skc) ({ \ + struct sock_common *_skc = skc; \ + sk = NULL; \ + tp = NULL; \ + if (_skc) { \ + tp = bpf_skc_to_tcp_sock(_skc); \ + sk = (struct sock *)tp; \ + } \ + tp; \ +}) + +unsigned short reuse_listen_hport = 0; +unsigned short listen_hport = 0; +char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic"; +char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp"; +bool random_retry = false; + +static bool tcp_cc_eq(const char *a, const char *b) +{ + int i; + + for (i = 0; i < TCP_CA_NAME_MAX; i++) { + if (a[i] != b[i]) + return false; + if (!a[i]) + break; + } + + return true; +} + +SEC("iter/tcp") +int change_tcp_cc(struct bpf_iter__tcp *ctx) +{ + char cur_cc[TCP_CA_NAME_MAX]; + struct tcp_sock *tp; + struct sock *sk; + int ret; + + if (!bpf_tcp_sk(ctx->sk_common)) + return 0; + + if (sk->sk_family != AF_INET6 || + (sk->sk_state != TCP_LISTEN && + sk->sk_state != TCP_ESTABLISHED) || + (sk->sk_num != reuse_listen_hport && + sk->sk_num != listen_hport && + bpf_ntohs(sk->sk_dport) != listen_hport)) + return 0; + + if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION, + cur_cc, sizeof(cur_cc))) + return 0; + + if (!tcp_cc_eq(cur_cc, cubic_cc)) + return 0; + + if (random_retry && bpf_get_prandom_u32() % 4 == 1) + return 1; + + bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc)); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 01378911252b..3af0998a0623 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -5,6 +5,10 @@ #define AF_INET 2 #define AF_INET6 10 +#define SOL_TCP 6 +#define TCP_CONGESTION 13 +#define TCP_CA_NAME_MAX 16 + #define ICSK_TIME_RETRANS 1 #define ICSK_TIME_PROBE0 3 #define ICSK_TIME_LOSS_PROBE 5 @@ -32,6 +36,8 @@ #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr +#define sk_num __sk_common.skc_num +#define sk_dport __sk_common.skc_dport #define sk_family __sk_common.skc_family #define sk_rmem_alloc sk_backlog.rmem_alloc #define sk_refcnt __sk_common.skc_refcnt diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c new file mode 100644 index 000000000000..a587aeca5ae0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +extern const void bpf_fentry_test1 __ksym; +extern const void bpf_fentry_test2 __ksym; +extern const void bpf_fentry_test3 __ksym; +extern const void bpf_fentry_test4 __ksym; +extern const void bpf_modify_return_test __ksym; +extern const void bpf_fentry_test6 __ksym; +extern const void bpf_fentry_test7 __ksym; + +__u64 test1_result = 0; +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test1_result = (const void *) addr == &bpf_fentry_test1; + return 0; +} + +__u64 test2_result = 0; +SEC("fexit/bpf_fentry_test2") +int BPF_PROG(test2, int a) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test2_result = (const void *) addr == &bpf_fentry_test2; + return 0; +} + +__u64 test3_result = 0; +SEC("kprobe/bpf_fentry_test3") +int test3(struct pt_regs *ctx) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test3_result = (const void *) addr == &bpf_fentry_test3; + return 0; +} + +__u64 test4_result = 0; +SEC("kretprobe/bpf_fentry_test4") +int BPF_KRETPROBE(test4) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test4_result = (const void *) addr == &bpf_fentry_test4; + return 0; +} + +__u64 test5_result = 0; +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(test5, int a, int *b, int ret) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test5_result = (const void *) addr == &bpf_modify_return_test; + return ret; +} + +__u64 test6_result = 0; +SEC("kprobe/bpf_fentry_test6+0x5") +int test6(struct pt_regs *ctx) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test6_result = (const void *) addr == &bpf_fentry_test6 + 5; + return 0; +} + +__u64 test7_result = 0; +SEC("kprobe/bpf_fentry_test7+5") +int test7(struct pt_regs *ctx) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test7_result = (const void *) addr == &bpf_fentry_test7 + 5; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index d071adf178bd..43649bce4c54 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -13,21 +13,21 @@ struct { __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); __type(key, struct bpf_cgroup_storage_key); - __type(value, struct percpu_net_cnt); + __type(value, union percpu_net_cnt); } percpu_netcnt SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); __type(key, struct bpf_cgroup_storage_key); - __type(value, struct net_cnt); + __type(value, union net_cnt); } netcnt SEC(".maps"); SEC("cgroup/skb") int bpf_nextcnt(struct __sk_buff *skb) { - struct percpu_net_cnt *percpu_cnt; + union percpu_net_cnt *percpu_cnt; char fmt[] = "%d %llu %llu\n"; - struct net_cnt *cnt; + union net_cnt *cnt; __u64 ts, dt; int ret; diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c new file mode 100644 index 000000000000..703c08e06442 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Isovalent, Inc. */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct inner { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, int); + __uint(max_entries, 4); +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 0); /* This will make map creation to fail */ + __uint(key_size, sizeof(__u32)); + __array(values, struct inner); +} mim SEC(".maps"); + +SEC("xdp") +int xdp_noop0(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 84cd63259554..a0e7762b1e5a 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -528,7 +528,6 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb) static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) { - char buf[sizeof(struct v6hdr)]; struct gre_hdr greh; struct udphdr udph; int olen = len; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c new file mode 100644 index 000000000000..d7b88cd05afd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_context(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + __u32 *metadata = (void *)(long)xdp->data_meta; + __u32 ret; + + if (metadata + 1 > data) + return XDP_ABORTED; + ret = *metadata; + if (bpf_xdp_adjust_meta(xdp, 4)) + return XDP_ABORTED; + return ret; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c new file mode 100644 index 000000000000..5f5309791649 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +struct hmap_elem { + int counter; + struct bpf_timer timer; + struct bpf_spin_lock lock; /* unused */ +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1000); + __type(key, int); + __type(value, struct hmap_elem); +} hmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 1000); + __type(key, int); + __type(value, struct hmap_elem); +} hmap_malloc SEC(".maps"); + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 4); + __type(key, int); + __type(value, struct elem); +} lru SEC(".maps"); + +__u64 bss_data; +__u64 err; +__u64 ok; +__u64 callback_check = 52; +__u64 callback2_check = 52; + +#define ARRAY 1 +#define HTAB 2 +#define HTAB_MALLOC 3 +#define LRU 4 + +/* callback for array and lru timers */ +static int timer_cb1(void *map, int *key, struct bpf_timer *timer) +{ + /* increment bss variable twice. + * Once via array timer callback and once via lru timer callback + */ + bss_data += 5; + + /* *key == 0 - the callback was called for array timer. + * *key == 4 - the callback was called from lru timer. + */ + if (*key == ARRAY) { + struct bpf_timer *lru_timer; + int lru_key = LRU; + + /* rearm array timer to be called again in ~35 seconds */ + if (bpf_timer_start(timer, 1ull << 35, 0) != 0) + err |= 1; + + lru_timer = bpf_map_lookup_elem(&lru, &lru_key); + if (!lru_timer) + return 0; + bpf_timer_set_callback(lru_timer, timer_cb1); + if (bpf_timer_start(lru_timer, 0, 0) != 0) + err |= 2; + } else if (*key == LRU) { + int lru_key, i; + + for (i = LRU + 1; + i <= 100 /* for current LRU eviction algorithm this number + * should be larger than ~ lru->max_entries * 2 + */; + i++) { + struct elem init = {}; + + /* lru_key cannot be used as loop induction variable + * otherwise the loop will be unbounded. + */ + lru_key = i; + + /* add more elements into lru map to push out current + * element and force deletion of this timer + */ + bpf_map_update_elem(map, &lru_key, &init, 0); + /* look it up to bump it into active list */ + bpf_map_lookup_elem(map, &lru_key); + + /* keep adding until *key changes underneath, + * which means that key/timer memory was reused + */ + if (*key != LRU) + break; + } + + /* check that the timer was removed */ + if (bpf_timer_cancel(timer) != -EINVAL) + err |= 4; + ok |= 1; + } + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + struct bpf_timer *arr_timer, *lru_timer; + struct elem init = {}; + int lru_key = LRU; + int array_key = ARRAY; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC); + + bpf_map_update_elem(&lru, &lru_key, &init, 0); + lru_timer = bpf_map_lookup_elem(&lru, &lru_key); + if (!lru_timer) + return 0; + bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC); + + bpf_timer_set_callback(arr_timer, timer_cb1); + bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0); + + /* init more timers to check that array destruction + * doesn't leak timer memory. + */ + array_key = 0; + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC); + return 0; +} + +/* callback for prealloc and non-prealloca hashtab timers */ +static int timer_cb2(void *map, int *key, struct hmap_elem *val) +{ + if (*key == HTAB) + callback_check--; + else + callback2_check--; + if (val->counter > 0 && --val->counter) { + /* re-arm the timer again to execute after 1 usec */ + bpf_timer_start(&val->timer, 1000, 0); + } else if (*key == HTAB) { + struct bpf_timer *arr_timer; + int array_key = ARRAY; + + /* cancel arr_timer otherwise bpf_fentry_test1 prog + * will stay alive forever. + */ + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + if (bpf_timer_cancel(arr_timer) != 1) + /* bpf_timer_cancel should return 1 to indicate + * that arr_timer was active at this time + */ + err |= 8; + + /* try to cancel ourself. It shouldn't deadlock. */ + if (bpf_timer_cancel(&val->timer) != -EDEADLK) + err |= 16; + + /* delete this key and this timer anyway. + * It shouldn't deadlock either. + */ + bpf_map_delete_elem(map, key); + + /* in preallocated hashmap both 'key' and 'val' could have been + * reused to store another map element (like in LRU above), + * but in controlled test environment the below test works. + * It's not a use-after-free. The memory is owned by the map. + */ + if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL) + err |= 32; + ok |= 2; + } else { + if (*key != HTAB_MALLOC) + err |= 64; + + /* try to cancel ourself. It shouldn't deadlock. */ + if (bpf_timer_cancel(&val->timer) != -EDEADLK) + err |= 128; + + /* delete this key and this timer anyway. + * It shouldn't deadlock either. + */ + bpf_map_delete_elem(map, key); + + /* in non-preallocated hashmap both 'key' and 'val' are RCU + * protected and still valid though this element was deleted + * from the map. Arm this timer for ~35 seconds. When callback + * finishes the call_rcu will invoke: + * htab_elem_free_rcu + * check_and_free_timer + * bpf_timer_cancel_and_free + * to cancel this 35 second sleep and delete the timer for real. + */ + if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0) + err |= 256; + ok |= 4; + } + return 0; +} + +int bpf_timer_test(void) +{ + struct hmap_elem *val; + int key = HTAB, key_malloc = HTAB_MALLOC; + + val = bpf_map_lookup_elem(&hmap, &key); + if (val) { + if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0) + err |= 512; + bpf_timer_set_callback(&val->timer, timer_cb2); + bpf_timer_start(&val->timer, 1000, 0); + } + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) { + if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0) + err |= 1024; + bpf_timer_set_callback(&val->timer, timer_cb2); + bpf_timer_start(&val->timer, 1000, 0); + } + return 0; +} + +SEC("fentry/bpf_fentry_test2") +int BPF_PROG(test2, int a, int b) +{ + struct hmap_elem init = {}, *val; + int key = HTAB, key_malloc = HTAB_MALLOC; + + init.counter = 10; /* number of times to trigger timer_cb2 */ + bpf_map_update_elem(&hmap, &key, &init, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (val) + bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME); + /* update the same key to free the timer */ + bpf_map_update_elem(&hmap, &key, &init, 0); + + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) + bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME); + /* update the same key to free the timer */ + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + + /* init more timers to check that htab operations + * don't leak timer memory. + */ + key = 0; + bpf_map_update_elem(&hmap, &key, &init, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (val) + bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME); + bpf_map_delete_elem(&hmap, &key); + bpf_map_update_elem(&hmap, &key, &init, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (val) + bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME); + + /* and with non-prealloc htab */ + key_malloc = 0; + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) + bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME); + bpf_map_delete_elem(&hmap_malloc, &key_malloc); + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) + bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME); + + return bpf_timer_test(); +} diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c new file mode 100644 index 000000000000..2fee7ab105ef --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_mim.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +struct hmap_elem { + int pad; /* unused */ + struct bpf_timer timer; +}; + +struct inner_map { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, int); + __type(value, struct hmap_elem); +} inner_htab SEC(".maps"); + +#define ARRAY_KEY 1 +#define HASH_KEY 1234 + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 2); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct inner_map); +} outer_arr SEC(".maps") = { + .values = { [ARRAY_KEY] = &inner_htab }, +}; + +__u64 err; +__u64 ok; +__u64 cnt; + +static int timer_cb1(void *map, int *key, struct hmap_elem *val); + +static int timer_cb2(void *map, int *key, struct hmap_elem *val) +{ + cnt++; + bpf_timer_set_callback(&val->timer, timer_cb1); + if (bpf_timer_start(&val->timer, 1000, 0)) + err |= 1; + ok |= 1; + return 0; +} + +/* callback for inner hash map */ +static int timer_cb1(void *map, int *key, struct hmap_elem *val) +{ + cnt++; + bpf_timer_set_callback(&val->timer, timer_cb2); + if (bpf_timer_start(&val->timer, 1000, 0)) + err |= 2; + /* Do a lookup to make sure 'map' and 'key' pointers are correct */ + bpf_map_lookup_elem(map, key); + ok |= 2; + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + struct hmap_elem init = {}; + struct bpf_map *inner_map; + struct hmap_elem *val; + int array_key = ARRAY_KEY; + int hash_key = HASH_KEY; + + inner_map = bpf_map_lookup_elem(&outer_arr, &array_key); + if (!inner_map) + return 0; + + bpf_map_update_elem(inner_map, &hash_key, &init, 0); + val = bpf_map_lookup_elem(inner_map, &hash_key); + if (!val) + return 0; + + bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC); + if (bpf_timer_set_callback(&val->timer, timer_cb1)) + err |= 4; + if (bpf_timer_start(&val->timer, 0, 0)) + err |= 8; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c new file mode 100644 index 000000000000..5d648e3d8a41 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +struct hmap_elem { + int pad; /* unused */ + struct bpf_timer timer; +}; + +struct inner_map { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, int); + __type(value, struct hmap_elem); +} inner_htab SEC(".maps"); + +#define ARRAY_KEY 1 +#define ARRAY_KEY2 2 +#define HASH_KEY 1234 + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 2); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct inner_map); +} outer_arr SEC(".maps") = { + .values = { [ARRAY_KEY] = &inner_htab }, +}; + +__u64 err; +__u64 ok; +__u64 cnt; + +/* callback for inner hash map */ +static int timer_cb(void *map, int *key, struct hmap_elem *val) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + struct hmap_elem init = {}; + struct bpf_map *inner_map, *inner_map2; + struct hmap_elem *val; + int array_key = ARRAY_KEY; + int array_key2 = ARRAY_KEY2; + int hash_key = HASH_KEY; + + inner_map = bpf_map_lookup_elem(&outer_arr, &array_key); + if (!inner_map) + return 0; + + inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2); + if (!inner_map2) + return 0; + bpf_map_update_elem(inner_map, &hash_key, &init, 0); + val = bpf_map_lookup_elem(inner_map, &hash_key); + if (!val) + return 0; + + bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC); + if (bpf_timer_set_callback(&val->timer, timer_cb)) + err |= 4; + if (bpf_timer_start(&val->timer, 0, 0)) + err |= 8; + return 0; +} diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py new file mode 100755 index 000000000000..be54b7335a76 --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -0,0 +1,586 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# +# Copyright (C) 2021 Isovalent, Inc. + +import argparse +import re +import os, sys + +LINUX_ROOT = os.path.abspath(os.path.join(__file__, + os.pardir, os.pardir, os.pardir, os.pardir, os.pardir)) +BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool') +retval = 0 + +class BlockParser(object): + """ + A parser for extracting set of values from blocks such as enums. + @reader: a pointer to the open file to parse + """ + def __init__(self, reader): + self.reader = reader + + def search_block(self, start_marker): + """ + Search for a given structure in a file. + @start_marker: regex marking the beginning of a structure to parse + """ + offset = self.reader.tell() + array_start = re.search(start_marker, self.reader.read()) + if array_start is None: + raise Exception('Failed to find start of block') + self.reader.seek(offset + array_start.start()) + + def parse(self, pattern, end_marker): + """ + Parse a block and return a set of values. Values to extract must be + on separate lines in the file. + @pattern: pattern used to identify the values to extract + @end_marker: regex marking the end of the block to parse + """ + entries = set() + while True: + line = self.reader.readline() + if not line or re.match(end_marker, line): + break + capture = pattern.search(line) + if capture and pattern.groups >= 1: + entries.add(capture.group(1)) + return entries + +class ArrayParser(BlockParser): + """ + A parser for extracting dicionaries of values from some BPF-related arrays. + @reader: a pointer to the open file to parse + @array_name: name of the array to parse + """ + end_marker = re.compile('^};') + + def __init__(self, reader, array_name): + self.array_name = array_name + self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n') + super().__init__(reader) + + def search_block(self): + """ + Search for the given array in a file. + """ + super().search_block(self.start_marker); + + def parse(self): + """ + Parse a block and return data as a dictionary. Items to extract must be + on separate lines in the file. + """ + pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$') + entries = {} + while True: + line = self.reader.readline() + if line == '' or re.match(self.end_marker, line): + break + capture = pattern.search(line) + if capture: + entries[capture.group(1)] = capture.group(2) + return entries + +class InlineListParser(BlockParser): + """ + A parser for extracting set of values from inline lists. + """ + def parse(self, pattern, end_marker): + """ + Parse a block and return a set of values. Multiple values to extract + can be on a same line in the file. + @pattern: pattern used to identify the values to extract + @end_marker: regex marking the end of the block to parse + """ + entries = set() + while True: + line = self.reader.readline() + if not line: + break + entries.update(pattern.findall(line)) + if re.search(end_marker, line): + break + return entries + +class FileExtractor(object): + """ + A generic reader for extracting data from a given file. This class contains + several helper methods that wrap arround parser objects to extract values + from different structures. + This class does not offer a way to set a filename, which is expected to be + defined in children classes. + """ + def __init__(self): + self.reader = open(self.filename, 'r') + + def close(self): + """ + Close the file used by the parser. + """ + self.reader.close() + + def reset_read(self): + """ + Reset the file position indicator for this parser. This is useful when + parsing several structures in the file without respecting the order in + which those structures appear in the file. + """ + self.reader.seek(0) + + def get_types_from_array(self, array_name): + """ + Search for and parse an array associating names to BPF_* enum members, + for example: + + const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + }; + + Return a dictionary with the enum member names as keys and the + associated names as values, for example: + + {'BPF_PROG_TYPE_UNSPEC': 'unspec', + 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter', + 'BPF_PROG_TYPE_KPROBE': 'kprobe'} + + @array_name: name of the array to parse + """ + array_parser = ArrayParser(self.reader, array_name) + array_parser.search_block() + return array_parser.parse() + + def get_enum(self, enum_name): + """ + Search for and parse an enum containing BPF_* members, for example: + + enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + }; + + Return a set containing all member names, for example: + + {'BPF_PROG_TYPE_UNSPEC', + 'BPF_PROG_TYPE_SOCKET_FILTER', + 'BPF_PROG_TYPE_KPROBE'} + + @enum_name: name of the enum to parse + """ + start_marker = re.compile(f'enum {enum_name} {{\n') + pattern = re.compile('^\s*(BPF_\w+),?$') + end_marker = re.compile('^};') + parser = BlockParser(self.reader) + parser.search_block(start_marker) + return parser.parse(pattern, end_marker) + + def __get_description_list(self, start_marker, pattern, end_marker): + parser = InlineListParser(self.reader) + parser.search_block(start_marker) + return parser.parse(pattern, end_marker) + + def get_rst_list(self, block_name): + """ + Search for and parse a list of type names from RST documentation, for + example: + + | *TYPE* := { + | **socket** | **kprobe** | + | **kretprobe** + | } + + Return a set containing all type names, for example: + + {'socket', 'kprobe', 'kretprobe'} + + @block_name: name of the blog to parse, 'TYPE' in the example + """ + start_marker = re.compile(f'\*{block_name}\* := {{') + pattern = re.compile('\*\*([\w/-]+)\*\*') + end_marker = re.compile('}\n') + return self.__get_description_list(start_marker, pattern, end_marker) + + def get_help_list(self, block_name): + """ + Search for and parse a list of type names from a help message in + bpftool, for example: + + " TYPE := { socket | kprobe |\\n" + " kretprobe }\\n" + + Return a set containing all type names, for example: + + {'socket', 'kprobe', 'kretprobe'} + + @block_name: name of the blog to parse, 'TYPE' in the example + """ + start_marker = re.compile(f'"\s*{block_name} := {{') + pattern = re.compile('([\w/]+) [|}]') + end_marker = re.compile('}') + return self.__get_description_list(start_marker, pattern, end_marker) + + def get_help_list_macro(self, macro): + """ + Search for and parse a list of values from a help message starting with + a macro in bpftool, for example: + + " " HELP_SPEC_OPTIONS " |\\n" + " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n" + + Return a set containing all item names, for example: + + {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'} + + @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example + """ + start_marker = re.compile(f'"\s*{macro}\s*" [|}}]') + pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])') + end_marker = re.compile('}\\\\n') + return self.__get_description_list(start_marker, pattern, end_marker) + + def default_options(self): + """ + Return the default options contained in HELP_SPEC_OPTIONS + """ + return { '-j', '--json', '-p', '--pretty', '-d', '--debug' } + + def get_bashcomp_list(self, block_name): + """ + Search for and parse a list of type names from a variable in bash + completion file, for example: + + local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\ + kretprobe' + + Return a set containing all type names, for example: + + {'socket', 'kprobe', 'kretprobe'} + + @block_name: name of the blog to parse, 'TYPE' in the example + """ + start_marker = re.compile(f'local {block_name}=\'') + pattern = re.compile('(?:.*=\')?([\w/]+)') + end_marker = re.compile('\'$') + return self.__get_description_list(start_marker, pattern, end_marker) + +class SourceFileExtractor(FileExtractor): + """ + An abstract extractor for a source file with usage message. + This class does not offer a way to set a filename, which is expected to be + defined in children classes. + """ + def get_options(self): + return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS')) + +class ProgFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's prog.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'prog.c') + + def get_prog_types(self): + return self.get_types_from_array('prog_type_name') + + def get_attach_types(self): + return self.get_types_from_array('attach_type_strings') + + def get_prog_attach_help(self): + return self.get_help_list('ATTACH_TYPE') + +class MapFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's map.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'map.c') + + def get_map_types(self): + return self.get_types_from_array('map_type_name') + + def get_map_help(self): + return self.get_help_list('TYPE') + +class CgroupFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's cgroup.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'cgroup.c') + + def get_prog_attach_help(self): + return self.get_help_list('ATTACH_TYPE') + +class CommonFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's common.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'common.c') + + def __init__(self): + super().__init__() + self.attach_types = {} + + def get_attach_types(self): + if not self.attach_types: + self.attach_types = self.get_types_from_array('attach_type_name') + return self.attach_types + + def get_cgroup_attach_types(self): + if not self.attach_types: + self.get_attach_types() + cgroup_types = {} + for (key, value) in self.attach_types.items(): + if key.find('BPF_CGROUP') != -1: + cgroup_types[key] = value + return cgroup_types + +class GenericSourceExtractor(SourceFileExtractor): + """ + An extractor for generic source code files. + """ + filename = "" + + def __init__(self, filename): + self.filename = os.path.join(BPFTOOL_DIR, filename) + super().__init__() + +class BpfHeaderExtractor(FileExtractor): + """ + An extractor for the UAPI BPF header. + """ + filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h') + + def get_prog_types(self): + return self.get_enum('bpf_prog_type') + + def get_map_types(self): + return self.get_enum('bpf_map_type') + + def get_attach_types(self): + return self.get_enum('bpf_attach_type') + +class ManPageExtractor(FileExtractor): + """ + An abstract extractor for an RST documentation page. + This class does not offer a way to set a filename, which is expected to be + defined in children classes. + """ + def get_options(self): + return self.get_rst_list('OPTIONS') + +class ManProgExtractor(ManPageExtractor): + """ + An extractor for bpftool-prog.rst. + """ + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst') + + def get_attach_types(self): + return self.get_rst_list('ATTACH_TYPE') + +class ManMapExtractor(ManPageExtractor): + """ + An extractor for bpftool-map.rst. + """ + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst') + + def get_map_types(self): + return self.get_rst_list('TYPE') + +class ManCgroupExtractor(ManPageExtractor): + """ + An extractor for bpftool-cgroup.rst. + """ + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst') + + def get_attach_types(self): + return self.get_rst_list('ATTACH_TYPE') + +class ManGenericExtractor(ManPageExtractor): + """ + An extractor for generic RST documentation pages. + """ + filename = "" + + def __init__(self, filename): + self.filename = os.path.join(BPFTOOL_DIR, filename) + super().__init__() + +class BashcompExtractor(FileExtractor): + """ + An extractor for bpftool's bash completion file. + """ + filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool') + + def get_prog_attach_types(self): + return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') + + def get_map_types(self): + return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES') + + def get_cgroup_attach_types(self): + return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES') + +def verify(first_set, second_set, message): + """ + Print all values that differ between two sets. + @first_set: one set to compare + @second_set: another set to compare + @message: message to print for values belonging to only one of the sets + """ + global retval + diff = first_set.symmetric_difference(second_set) + if diff: + print(message, diff) + retval = 1 + +def main(): + # No arguments supported at this time, but print usage for -h|--help + argParser = argparse.ArgumentParser(description=""" + Verify that bpftool's code, help messages, documentation and bash + completion are all in sync on program types, map types, attach types, and + options. Also check that bpftool is in sync with the UAPI BPF header. + """) + args = argParser.parse_args() + + # Map types (enum) + + bpf_info = BpfHeaderExtractor() + ref = bpf_info.get_map_types() + + map_info = MapFileExtractor() + source_map_items = map_info.get_map_types() + map_types_enum = set(source_map_items.keys()) + + verify(ref, map_types_enum, + f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):') + + # Map types (names) + + source_map_types = set(source_map_items.values()) + source_map_types.discard('unspec') + + help_map_types = map_info.get_map_help() + help_map_options = map_info.get_options() + map_info.close() + + man_map_info = ManMapExtractor() + man_map_options = man_map_info.get_options() + man_map_types = man_map_info.get_map_types() + man_map_info.close() + + bashcomp_info = BashcompExtractor() + bashcomp_map_types = bashcomp_info.get_map_types() + + verify(source_map_types, help_map_types, + f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):') + verify(source_map_types, man_map_types, + f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):') + verify(help_map_options, man_map_options, + f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') + verify(source_map_types, bashcomp_map_types, + f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') + + # Program types (enum) + + ref = bpf_info.get_prog_types() + + prog_info = ProgFileExtractor() + prog_types = set(prog_info.get_prog_types().keys()) + + verify(ref, prog_types, + f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):') + + # Attach types (enum) + + ref = bpf_info.get_attach_types() + bpf_info.close() + + common_info = CommonFileExtractor() + attach_types = common_info.get_attach_types() + + verify(ref, attach_types, + f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):') + + # Attach types (names) + + source_prog_attach_types = set(prog_info.get_attach_types().values()) + + help_prog_attach_types = prog_info.get_prog_attach_help() + help_prog_options = prog_info.get_options() + prog_info.close() + + man_prog_info = ManProgExtractor() + man_prog_options = man_prog_info.get_options() + man_prog_attach_types = man_prog_info.get_attach_types() + man_prog_info.close() + + bashcomp_info.reset_read() # We stopped at map types, rewind + bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() + + verify(source_prog_attach_types, help_prog_attach_types, + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') + verify(source_prog_attach_types, man_prog_attach_types, + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):') + verify(help_prog_options, man_prog_options, + f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):') + verify(source_prog_attach_types, bashcomp_prog_attach_types, + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') + + # Cgroup attach types + + source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values()) + common_info.close() + + cgroup_info = CgroupFileExtractor() + help_cgroup_attach_types = cgroup_info.get_prog_attach_help() + help_cgroup_options = cgroup_info.get_options() + cgroup_info.close() + + man_cgroup_info = ManCgroupExtractor() + man_cgroup_options = man_cgroup_info.get_options() + man_cgroup_attach_types = man_cgroup_info.get_attach_types() + man_cgroup_info.close() + + bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types() + bashcomp_info.close() + + verify(source_cgroup_attach_types, help_cgroup_attach_types, + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') + verify(source_cgroup_attach_types, man_cgroup_attach_types, + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') + verify(help_cgroup_options, man_cgroup_options, + f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') + verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') + + # Options for remaining commands + + for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]: + source_info = GenericSourceExtractor(cmd + '.c') + help_cmd_options = source_info.get_options() + source_info.close() + + man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst')) + man_cmd_options = man_cmd_info.get_options() + man_cmd_info.close() + + verify(help_cmd_options, man_cmd_options, + f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):') + + source_main_info = GenericSourceExtractor('main.c') + help_main_options = source_main_info.get_options() + source_main_info.close() + + man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst')) + man_main_options = man_main_info.get_options() + man_main_info.close() + + verify(help_main_options, man_main_options, + f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):') + + sys.exit(retval) + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 30cbf5d98f7d..14cea869235b 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -764,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data) udp = socket(AF_INET, SOCK_DGRAM, 0); i = 0; err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY); - if (!err) { - printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n", + if (err) { + printf("Failed socket update SOCK_DGRAM '%i:%i'\n", i, udp); goto out_sockmap; } @@ -1153,12 +1153,17 @@ out_sockmap: } #define MAPINMAP_PROG "./test_map_in_map.o" +#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o" static void test_map_in_map(void) { struct bpf_object *obj; struct bpf_map *map; int mim_fd, fd, err; int pos = 0; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + __u32 id = 0; + libbpf_print_fn_t old_print_fn; obj = bpf_object__open(MAPINMAP_PROG); @@ -1228,11 +1233,72 @@ static void test_map_in_map(void) } close(fd); + fd = -1; bpf_object__close(obj); + + /* Test that failing bpf_object__create_map() destroys the inner map */ + obj = bpf_object__open(MAPINMAP_INVALID_PROG); + err = libbpf_get_error(obj); + if (err) { + printf("Failed to load %s program: %d %d", + MAPINMAP_INVALID_PROG, err, errno); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim"); + if (!map) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + + old_print_fn = libbpf_set_print(NULL); + + err = bpf_object__load(obj); + if (!err) { + printf("Loading obj supposed to fail\n"); + goto out_map_in_map; + } + + libbpf_set_print(old_print_fn); + + /* Iterate over all maps to check whether the internal map + * ("mim.internal") has been destroyed. + */ + while (true) { + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + printf("Failed to get next map: %d", errno); + goto out_map_in_map; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + printf("Failed to get map by id %u: %d", id, errno); + goto out_map_in_map; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + printf("Failed to get map info by fd %d: %d", fd, + errno); + goto out_map_in_map; + } + + if (!strcmp(info.name, "mim.inner")) { + printf("Inner map mim.inner was not destroyed\n"); + goto out_map_in_map; + } + } + return; out_map_in_map: - close(fd); + if (fd >= 0) + close(fd); exit(1); } diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c index a7b9a69f4fd5..4990a99e7381 100644 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -33,14 +33,14 @@ static int bpf_find_map(const char *test, struct bpf_object *obj, int main(int argc, char **argv) { - struct percpu_net_cnt *percpu_netcnt; + union percpu_net_cnt *percpu_netcnt; struct bpf_cgroup_storage_key key; int map_fd, percpu_map_fd; int error = EXIT_FAILURE; - struct net_cnt netcnt; struct bpf_object *obj; int prog_fd, cgroup_fd; unsigned long packets; + union net_cnt netcnt; unsigned long bytes; int cpu, nproc; __u32 prog_cnt; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 8ef7f334e715..c8c2bf878f67 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_STRNEQ(actual, expected, len, name) ({ \ + static int duration = 0; \ + const char *___act = actual; \ + const char *___exp = expected; \ + int ___len = len; \ + bool ___ok = strncmp(___act, ___exp, ___len) == 0; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual '%.*s' != expected '%.*s'\n", \ + (name), ___len, ___act, ___len, ___exp); \ + ___ok; \ +}) + #define ASSERT_OK(res, name) ({ \ static int duration = 0; \ long long ___res = (res); \ diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index c9dde9b9d987..088fcad138c9 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -69,7 +69,7 @@ cleanup() { } server_listen() { - ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" & + ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" & server_pid=$! sleep 0.2 } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 79c9eb0034d5..4f9f73e7a299 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -25,6 +25,7 @@ TEST_PROGS += bareudp.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += veth.sh +TEST_PROGS += ioam6.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -36,8 +37,11 @@ TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp TEST_GEN_FILES += ipsec +TEST_GEN_FILES += ioam6_parser +TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_FILES += toeplitz TEST_FILES := settings diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile new file mode 100644 index 000000000000..cfc7f4f97fd1 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -0,0 +1,5 @@ +##TEST_GEN_FILES := test_unix_oob +TEST_PROGS := test_unix_oob +include ../../lib.mk + +all: $(TEST_PROGS) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c new file mode 100644 index 000000000000..0f3e3763f4f8 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <netinet/tcp.h> +#include <sys/un.h> +#include <sys/signal.h> +#include <sys/poll.h> + +static int pipefd[2]; +static int signal_recvd; +static pid_t producer_id; +static char sock_name[32]; + +static void sig_hand(int sn, siginfo_t *si, void *p) +{ + signal_recvd = sn; +} + +static int set_sig_handler(int signal) +{ + struct sigaction sa; + + sa.sa_sigaction = sig_hand; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_RESTART; + + return sigaction(signal, &sa, NULL); +} + +static void set_filemode(int fd, int set) +{ + int flags = fcntl(fd, F_GETFL, 0); + + if (set) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + fcntl(fd, F_SETFL, flags); +} + +static void signal_producer(int fd) +{ + char cmd; + + cmd = 'S'; + write(fd, &cmd, sizeof(cmd)); +} + +static void wait_for_signal(int fd) +{ + char buf[5]; + + read(fd, buf, 5); +} + +static void die(int status) +{ + fflush(NULL); + unlink(sock_name); + kill(producer_id, SIGTERM); + exit(status); +} + +int is_sioctatmark(int fd) +{ + int ans = -1; + + if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { +#ifdef DEBUG + perror("SIOCATMARK Failed"); +#endif + } + return ans; +} + +void read_oob(int fd, char *c) +{ + + *c = ' '; + if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { +#ifdef DEBUG + perror("Reading MSG_OOB Failed"); +#endif + } +} + +int read_data(int pfd, char *buf, int size) +{ + int len = 0; + + memset(buf, size, '0'); + len = read(pfd, buf, size); +#ifdef DEBUG + if (len < 0) + perror("read failed"); +#endif + return len; +} + +static void wait_for_data(int pfd, int event) +{ + struct pollfd pfds[1]; + + pfds[0].fd = pfd; + pfds[0].events = event; + poll(pfds, 1, -1); +} + +void producer(struct sockaddr_un *consumer_addr) +{ + int cfd; + char buf[64]; + int i; + + memset(buf, 'x', sizeof(buf)); + cfd = socket(AF_UNIX, SOCK_STREAM, 0); + + wait_for_signal(pipefd[0]); + if (connect(cfd, (struct sockaddr *)consumer_addr, + sizeof(struct sockaddr)) != 0) { + perror("Connect failed"); + kill(0, SIGTERM); + exit(1); + } + + for (i = 0; i < 2; i++) { + /* Test 1: Test for SIGURG and OOB */ + wait_for_signal(pipefd[0]); + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 2: Test for OOB being overwitten */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '#'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 3: Test for SIOCATMARK */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + send(cfd, buf, sizeof(buf), 0); + + wait_for_signal(pipefd[0]); + + /* Test 4: Test for 1byte OOB msg */ + memset(buf, 'x', sizeof(buf)); + buf[0] = '@'; + send(cfd, buf, 1, MSG_OOB); + } +} + +int +main(int argc, char **argv) +{ + int lfd, pfd; + struct sockaddr_un consumer_addr, paddr; + socklen_t len = sizeof(consumer_addr); + char buf[1024]; + int on = 0; + char oob; + int flags; + int atmark; + char *tmp_file; + + lfd = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&consumer_addr, 0, sizeof(consumer_addr)); + consumer_addr.sun_family = AF_UNIX; + sprintf(sock_name, "unix_oob_%d", getpid()); + unlink(sock_name); + strcpy(consumer_addr.sun_path, sock_name); + + if ((bind(lfd, (struct sockaddr *)&consumer_addr, + sizeof(consumer_addr))) != 0) { + perror("socket bind failed"); + exit(1); + } + + pipe(pipefd); + + listen(lfd, 1); + + producer_id = fork(); + if (producer_id == 0) { + producer(&consumer_addr); + exit(0); + } + + set_sig_handler(SIGURG); + signal_producer(pipefd[1]); + + pfd = accept(lfd, (struct sockaddr *) &paddr, &len); + fcntl(pfd, F_SETOWN, getpid()); + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1: + * veriyf that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + wait_for_data(pfd, POLLIN | POLLPRI); + read_oob(pfd, &oob); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 63 || oob != '@') { + fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2: + * Verify that the first OOB is over written by + * the 2nd one and the first OOB is returned as + * part of the read, and sigurg is received. + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = 0; + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + read_oob(pfd, &oob); + if (!signal_recvd || len != 127 || oob != '#') { + fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and atmark + * is set. + * oob is '%' and second read returns + * 64 bytes. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 150) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + read_oob(pfd, &oob); + + if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { + fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", + "atmark %d\n", signal_recvd, len, oob, atmark); + die(1); + } + + signal_recvd = 0; + + len = read_data(pfd, buf, 1024); + if (len != 64) { + fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4: + * verify that a single byte + * oob message is delivered. + * set non blocking mode and + * check proper error is + * returned and sigurg is + * received and correct + * oob is read. + */ + + set_filemode(pfd, 0); + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if ((len == -1) && (errno == 11)) + len = 0; + + read_oob(pfd, &oob); + + if (!signal_recvd || len != 0 || oob != '@') { + fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + set_filemode(pfd, 1); + + /* Inline Testing */ + + on = 1; + if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { + perror("SO_OOBINLINE"); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1 -- Inline: + * Check that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + + if (!signal_recvd || len != 63) { + fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", + signal_recvd, len); + die(1); + } + + len = read_data(pfd, buf, 1024); + + if (len != 1) { + fprintf(stderr, + "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2 -- Inline: + * Verify that the first OOB is over written by + * the 2nd one and read breaks correctly on + * 2nd OOB boundary with the first OOB returned as + * part of the read, and sigurg is delivered and + * siocatmark returns true. + * next read returns one byte, the oob byte + * and siocatmark returns false. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 127 || atmark != 1 || !signal_recvd) { + fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", + len, atmark); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 1 || buf[0] != '#' || atmark == 1) { + fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3 -- Inline: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and siocatmark + * is true after the read. + * subsequent read returns 65 bytes + * because of oob which should be '%'. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 126) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (!signal_recvd || len != 127 || !atmark) { + fprintf(stderr, + "Test 3 Inline failed, sigurg %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 65 || buf[0] != '%' || atmark != 0) { + fprintf(stderr, + "Test 3.1 Inline failed, len %d oob %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4 -- Inline: + * verify that a single + * byte oob message is delivered + * and read returns one byte, the oob + * byte and sigurg is received + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 1 || buf[0] != '@') { + fprintf(stderr, + "Test 4 Inline failed, signal %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + die(0); +} diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6f905b53904f..21b646d10b88 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -42,3 +42,4 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m +CONFIG_IPV6_IOAM6_LWTUNNEL=y diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c new file mode 100644 index 000000000000..cf37ce86b0fd --- /dev/null +++ b/tools/testing/selftests/net/gro.c @@ -0,0 +1,1095 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This testsuite provides conformance testing for GRO coalescing. + * + * Test cases: + * 1.data + * Data packets of the same size and same header setup with correct + * sequence numbers coalesce. The one exception being the last data + * packet coalesced: it can be smaller than the rest and coalesced + * as long as it is in the same flow. + * 2.ack + * Pure ACK does not coalesce. + * 3.flags + * Specific test cases: no packets with PSH, SYN, URG, RST set will + * be coalesced. + * 4.tcp + * Packets with incorrect checksum, non-consecutive seqno and + * different TCP header options shouldn't coalesce. Nit: given that + * some extension headers have paddings, such as timestamp, headers + * that are padding differently would not be coalesced. + * 5.ip: + * Packets with different (ECN, TTL, TOS) header, ip options or + * ip fragments (ipv6) shouldn't coalesce. + * 6.large: + * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. + * + * MSS is defined as 4096 - header because if it is too small + * (i.e. 1500 MTU - header), it will result in many packets, + * increasing the "large" test case's flakiness. This is because + * due to time sensitivity in the coalescing window, the receiver + * may not coalesce all of the packets. + * + * Note the timing issue applies to all of the test cases, so some + * flakiness is to be expected. + * + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> + +#define DPORT 8000 +#define SPORT 1500 +#define PAYLOAD_LEN 100 +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define NUM_PACKETS 4 +#define START_SEQ 100 +#define START_ACK 100 +#define SIP6 "fdaa::2" +#define DIP6 "fdaa::1" +#define SIP4 "192.168.1.200" +#define DIP4 "192.168.1.100" +#define ETH_P_NONE 0 +#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) +#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) + +static int proto = -1; +static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; +static char *testname = "data"; +static char *ifname = "eth0"; +static char *smac = "aa:00:00:00:00:02"; +static char *dmac = "aa:00:00:00:00:01"; +static bool verbose; +static bool tx_socket = true; +static int tcp_offset = -1; +static int total_hdr_len = -1; +static int ethhdr_proto = -1; + +static void vlog(const char *fmt, ...) +{ + va_list args; + + if (verbose) { + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + } +} + +static void setup_sock_filter(int fd) +{ + const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); + const int ethproto_off = offsetof(struct ethhdr, h_proto); + int optlen = 0; + int ipproto_off; + int next_off; + + if (proto == PF_INET) + next_off = offsetof(struct iphdr, protocol); + else + next_off = offsetof(struct ipv6hdr, nexthdr); + ipproto_off = ETH_HLEN + next_off; + + if (strcmp(testname, "ip") == 0) { + if (proto == PF_INET) + optlen = sizeof(struct ip_timestamp); + else + optlen = sizeof(struct ip6_frag); + } + + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1), + BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF), + BPF_STMT(BPF_RET + BPF_K, 0), + }; + + struct sock_fprog bpf = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0) + error(1, errno, "error setting filter"); +} + +static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static uint16_t tcp_checksum(void *buf, int payload_len) +{ + struct pseudo_header6 { + struct in6_addr saddr; + struct in6_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph6; + struct pseudo_header4 { + struct in_addr saddr; + struct in_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph4; + uint32_t sum = 0; + + if (proto == PF_INET6) { + if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1) + error(1, errno, "inet_pton6 source ip pseudo"); + if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1) + error(1, errno, "inet_pton6 dest ip pseudo"); + ph6.protocol = htons(IPPROTO_TCP); + ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph6, sizeof(ph6), 0); + } else if (proto == PF_INET) { + if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1) + error(1, errno, "inet_pton source ip pseudo"); + if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1) + error(1, errno, "inet_pton dest ip pseudo"); + ph4.protocol = htons(IPPROTO_TCP); + ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph4, sizeof(ph4), 0); + } + + return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum); +} + +static void read_MAC(uint8_t *mac_addr, char *mac) +{ + if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) + error(1, 0, "sscanf"); +} + +static void fill_datalinklayer(void *buf) +{ + struct ethhdr *eth = buf; + + memcpy(eth->h_dest, dst_mac, ETH_ALEN); + memcpy(eth->h_source, src_mac, ETH_ALEN); + eth->h_proto = ethhdr_proto; +} + +static void fill_networklayer(void *buf, int payload_len) +{ + struct ipv6hdr *ip6h = buf; + struct iphdr *iph = buf; + + if (proto == PF_INET6) { + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = 8; + if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1) + error(1, errno, "inet_pton source ip6"); + if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1) + error(1, errno, "inet_pton dest ip6"); + } else if (proto == PF_INET) { + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = IPPROTO_TCP; + iph->tot_len = htons(sizeof(struct tcphdr) + + payload_len + sizeof(struct iphdr)); + iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ + if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1) + error(1, errno, "inet_pton source ip"); + if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1) + error(1, errno, "inet_pton dest ip"); + iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); + } +} + +static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + struct tcphdr *tcph = buf; + + memset(tcph, 0, sizeof(*tcph)); + + tcph->source = htons(SPORT); + tcph->dest = htons(DPORT); + tcph->seq = ntohl(START_SEQ + seq_offset); + tcph->ack_seq = ntohl(START_ACK + ack_offset); + tcph->ack = 1; + tcph->fin = fin; + tcph->doff = 5; + tcph->window = htons(TCP_MAXWIN); + tcph->urg_ptr = 0; + tcph->check = tcp_checksum(tcph, payload_len); +} + +static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) +{ + int ret = -1; + + ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); + if (ret == -1) + error(1, errno, "sendto failure"); + if (ret != len) + error(1, errno, "sendto wrong length"); +} + +static void create_packet(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + memset(buf, 0, total_hdr_len); + memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, + payload_len, fin); + fill_networklayer(buf + ETH_HLEN, payload_len); + fill_datalinklayer(buf); +} + +/* send one extra flag, not first and not last pkt */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg) +{ + static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, flag, i; + struct tcphdr *tcph; + + payload_len = PAYLOAD_LEN * psh; + pkt_size = total_hdr_len + payload_len; + flag = NUM_PACKETS / 2; + + create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); + + tcph = (struct tcphdr *)(flag_buf + tcp_offset); + tcph->psh = psh; + tcph->syn = syn; + tcph->rst = rst; + tcph->urg = urg; + tcph->check = 0; + tcph->check = tcp_checksum(tcph, payload_len); + + for (i = 0; i < NUM_PACKETS + 1; i++) { + if (i == flag) { + write_packet(fd, flag_buf, pkt_size, daddr); + continue; + } + create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +/* Test for data of same length, smaller than previous + * and of different lengths + */ +static void send_data_pkts(int fd, struct sockaddr_ll *daddr, + int payload_len1, int payload_len2) +{ + static char buf[ETH_HLEN + IP_MAXPACKET]; + + create_packet(buf, 0, 0, payload_len1, 0); + write_packet(fd, buf, total_hdr_len + payload_len1, daddr); + create_packet(buf, payload_len1, 0, payload_len2, 0); + write_packet(fd, buf, total_hdr_len + payload_len2, daddr); +} + +/* If incoming segments make tracked segment length exceed + * legal IP datagram length, do not coalesce + */ +static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) +{ + static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; + static char last[TOTAL_HDR_LEN + MSS]; + static char new_seg[TOTAL_HDR_LEN + MSS]; + int i; + + for (i = 0; i < NUM_LARGE_PKT; i++) + create_packet(pkts[i], i * MSS, 0, MSS, 0); + create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); + create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); + + for (i = 0; i < NUM_LARGE_PKT; i++) + write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); + write_packet(fd, last, total_hdr_len + remainder, daddr); + write_packet(fd, new_seg, total_hdr_len + remainder, daddr); +} + +/* Pure acks and dup acks don't coalesce */ +static void send_ack(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN]; + + create_packet(buf, 0, 0, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); + write_packet(fd, buf, total_hdr_len, daddr); + create_packet(buf, 0, 1, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); +} + +static void recompute_packet(char *buf, char *no_ext, int extlen) +{ + struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + memmove(buf, no_ext, total_hdr_len); + memmove(buf + total_hdr_len + extlen, + no_ext + total_hdr_len, PAYLOAD_LEN); + + tcphdr->doff = tcphdr->doff + (extlen / 4); + tcphdr->check = 0; + tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); + if (proto == PF_INET) { + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + } +} + +static void tcp_write_options(char *buf, int kind, int ts) +{ + struct tcp_option_ts { + uint8_t kind; + uint8_t len; + uint32_t tsval; + uint32_t tsecr; + } *opt_ts = (void *)buf; + struct tcp_option_window { + uint8_t kind; + uint8_t len; + uint8_t shift; + } *opt_window = (void *)buf; + + switch (kind) { + case TCPOPT_NOP: + buf[0] = TCPOPT_NOP; + break; + case TCPOPT_WINDOW: + memset(opt_window, 0, sizeof(struct tcp_option_window)); + opt_window->kind = TCPOPT_WINDOW; + opt_window->len = TCPOLEN_WINDOW; + opt_window->shift = 0; + break; + case TCPOPT_TIMESTAMP: + memset(opt_ts, 0, sizeof(struct tcp_option_ts)); + opt_ts->kind = TCPOPT_TIMESTAMP; + opt_ts->len = TCPOLEN_TIMESTAMP; + opt_ts->tsval = ts; + opt_ts->tsecr = 0; + break; + default: + error(1, 0, "unimplemented TCP option"); + break; + } +} + +/* TCP with options is always a permutation of {TS, NOP, NOP}. + * Implement different orders to verify coalescing stops. + */ +static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order) +{ + switch (order) { + case 0: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */, + TCPOPT_TIMESTAMP, ts); + break; + case 1: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, + TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP, + TCPOPT_NOP, 0); + break; + case 2: + tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1, + TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2, + TCPOPT_NOP, 0); + break; + default: + error(1, 0, "unknown order"); + break; + } + recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA); +} + +/* Packets with invalid checksum don't coalesce. */ +static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->check = tcph->check - 1; + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packets with non-consecutive sequence number don't coalesce.*/ +static void send_changed_seq(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->seq = ntohl(htonl(tcph->seq) + 1); + tcph->check = 0; + tcph->check = tcp_checksum(tcph, PAYLOAD_LEN); + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packet with different timestamp option or different timestamps + * don't coalesce. + */ +static void send_changed_ts(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 1); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 2); + write_packet(fd, extpkt, pkt_size, daddr); +} + +/* Packet with different tcp options don't coalesce. */ +static void send_diff_opt(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG]; + int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0); + recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1); + write_packet(fd, extpkt2, extpkt2_size, daddr); +} + +static void add_ipv4_ts_option(void *buf, void *optpkt) +{ + struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset); + int optlen = sizeof(struct ip_timestamp); + struct iphdr *iph; + + if (optlen % 4) + error(1, 0, "ipv4 timestamp length is not a multiple of 4B"); + + ts->ipt_code = IPOPT_TS; + ts->ipt_len = optlen; + ts->ipt_ptr = 5; + ts->ipt_flg = IPOPT_TS_TSONLY; + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph = (struct iphdr *)(optpkt + ETH_HLEN); + iph->ihl = 5 + (optlen / 4); + iph->tot_len = htons(ntohs(iph->tot_len) + optlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); +} + +/* IPv4 options shouldn't coalesce */ +static void send_ip_options(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)]; + int optlen = sizeof(struct ip_timestamp); + int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv4_ts_option(buf, optpkt); + write_packet(fd, optpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); +} + +/* IPv4 fragments shouldn't coalesce */ +static void send_fragment4(int fd, struct sockaddr_ll *daddr) +{ + static char buf[IP_MAXPACKET]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + /* Once fragmented, packet would retain the total_len. + * Tcp header is prepared as if rest of data is in follow-up frags, + * but follow up frags aren't actually sent. + */ + memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); + fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_datalinklayer(buf); + + iph->frag_off = htons(0x6000); // DF = 1, MF = 1 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv4 packets with different ttl don't coalesce.*/ +static void send_changed_ttl(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + iph->ttl = 7; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different tos don't coalesce.*/ +static void send_changed_tos(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + iph->tos = 1; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else if (proto == PF_INET6) { + ip6h->priority = 0xf; + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different ECN don't coalesce.*/ +static void send_changed_ECN(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10 + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv6 fragments and packets with extensions don't coalesce.*/ +static void send_fragment6(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN + + sizeof(struct ip6_frag)]; + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct ip6_frag *frag = (void *)(extpkt + tcp_offset); + int extlen = sizeof(struct ip6_frag); + int bufpkt_len = total_hdr_len + PAYLOAD_LEN; + int extpkt_len = bufpkt_len + extlen; + int i; + + for (i = 0; i < 2; i++) { + create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); + } + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + memset(extpkt, 0, extpkt_len); + + ip6h->nexthdr = IPPROTO_FRAGMENT; + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + frag->ip6f_nxt = IPPROTO_TCP; + + memcpy(extpkt, buf, tcp_offset); + memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + write_packet(fd, extpkt, extpkt_len, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); +} + +static void bind_packetsocket(int fd) +{ + struct sockaddr_ll daddr = {}; + + daddr.sll_family = AF_PACKET; + daddr.sll_protocol = ethhdr_proto; + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + + if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0) + error(1, errno, "could not bind socket"); +} + +static void set_timeout(int fd) +{ + struct timeval timeout; + + timeout.tv_sec = 120; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)) < 0) + error(1, errno, "cannot set timeout, setsockopt failed"); +} + +static void check_recv_pkts(int fd, int *correct_payload, + int correct_num_pkts) +{ + static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; + struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + struct tcphdr *tcph; + bool bad_packet = false; + int tcp_ext_len = 0; + int ip_ext_len = 0; + int pkt_size = -1; + int data_len = 0; + int num_pkt = 0; + int i; + + vlog("Expected {"); + for (i = 0; i < correct_num_pkts; i++) + vlog("%d ", correct_payload[i]); + vlog("}, Total %d packets\nReceived {", correct_num_pkts); + + while (1) { + pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + if (pkt_size < 0) + error(1, errno, "could not receive"); + + if (iph->version == 4) + ip_ext_len = (iph->ihl - 5) * 4; + else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + ip_ext_len = sizeof(struct ip6_frag); + + tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); + + if (tcph->fin) + break; + + tcp_ext_len = (tcph->doff - 5) * 4; + data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len; + /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3. + * Ipv4/tcp packets without at least 6 bytes of data will be padded. + * Packet sockets are protocol agnostic, and will not trim the padding. + */ + if (pkt_size == ETH_ZLEN && iph->version == 4) { + data_len = ntohs(iph->tot_len) + - sizeof(struct tcphdr) - sizeof(struct iphdr); + } + vlog("%d ", data_len); + if (data_len != correct_payload[num_pkt]) { + vlog("[!=%d]", correct_payload[num_pkt]); + bad_packet = true; + } + num_pkt++; + } + vlog("}, Total %d packets.\n", num_pkt); + if (num_pkt != correct_num_pkts) + error(1, 0, "incorrect number of packets"); + if (bad_packet) + error(1, 0, "incorrect packet geometry"); + + printf("Test succeeded\n\n"); +} + +static void gro_sender(void) +{ + static char fin_pkt[MAX_HDR_LEN]; + struct sockaddr_ll daddr = {}; + int txfd = -1; + + txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (txfd < 0) + error(1, errno, "socket creation"); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + daddr.sll_family = AF_PACKET; + memcpy(daddr.sll_addr, dst_mac, ETH_ALEN); + daddr.sll_halen = ETH_ALEN; + create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); + + if (strcmp(testname, "data") == 0) { + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ack") == 0) { + send_ack(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags") == 0) { + send_flags(txfd, &daddr, 1, 0, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 1, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 0, 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "tcp") == 0) { + send_changed_checksum(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_seq(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_ts(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_diff_opt(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip") == 0) { + send_changed_ECN(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_tos(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + if (proto == PF_INET) { + /* Modified packets may be received out of order. + * Sleep function added to enforce test boundaries + * so that fin pkts are not received prior to other pkts. + */ + sleep(1); + send_changed_ttl(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_ip_options(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_fragment4(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (proto == PF_INET6) { + send_fragment6(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } + } else if (strcmp(testname, "large") == 0) { + /* 20 is the difference between min iphdr size + * and min ipv6hdr size. Like MAX_HDR_SIZE, + * MAX_PAYLOAD is defined with the larger header of the two. + */ + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + send_large(txfd, &daddr, remainder); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_large(txfd, &daddr, remainder + 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { + error(1, 0, "Unknown testcase"); + } + + if (close(txfd)) + error(1, errno, "socket close"); +} + +static void gro_receiver(void) +{ + static int correct_payload[NUM_PACKETS]; + int rxfd = -1; + + rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE)); + if (rxfd < 0) + error(1, 0, "socket creation"); + setup_sock_filter(rxfd); + set_timeout(rxfd); + bind_packetsocket(rxfd); + + memset(correct_payload, 0, sizeof(correct_payload)); + + if (strcmp(testname, "data") == 0) { + printf("pure data packet of same size: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("large data packets followed by a smaller one: "); + correct_payload[0] = PAYLOAD_LEN * 1.5; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("small data packets followed by a larger one: "); + correct_payload[0] = PAYLOAD_LEN / 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ack") == 0) { + printf("duplicate ack and pure ack: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "flags") == 0) { + correct_payload[0] = PAYLOAD_LEN * 3; + correct_payload[1] = PAYLOAD_LEN * 2; + + printf("psh flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 2); + + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("syn flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("rst flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("urg flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "tcp") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + correct_payload[3] = PAYLOAD_LEN; + + printf("changed checksum does not coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Wrong Seq number doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Different timestamp doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 4); + + printf("Different options doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + + printf("different ECN doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("different tos doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + if (proto == PF_INET) { + printf("different ttl doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("ip options doesn't coalesce: "); + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("fragmented ip4 doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (proto == PF_INET6) { + /* GRO doesn't check for ipv6 hop limit when flushing. + * Hence no corresponding test to the ipv4 case. + */ + printf("fragmented ip6 doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } + } else if (strcmp(testname, "large") == 0) { + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + correct_payload[0] = (MAX_PAYLOAD + offset); + correct_payload[1] = remainder; + printf("Shouldn't coalesce if exceed IP max pkt size: "); + check_recv_pkts(rxfd, correct_payload, 2); + + /* last segment sent individually, doesn't start new segment */ + correct_payload[0] = correct_payload[0] - remainder; + correct_payload[1] = remainder + 1; + correct_payload[2] = remainder + 1; + check_recv_pkts(rxfd, correct_payload, 3); + } else { + error(1, 0, "Test case error, should never trigger"); + } + + if (close(rxfd)) + error(1, 0, "socket close"); +} + +static void parse_args(int argc, char **argv) +{ + static const struct option opts[] = { + { "dmac", required_argument, NULL, 'D' }, + { "iface", required_argument, NULL, 'i' }, + { "ipv4", no_argument, NULL, '4' }, + { "ipv6", no_argument, NULL, '6' }, + { "rx", no_argument, NULL, 'r' }, + { "smac", required_argument, NULL, 'S' }, + { "test", required_argument, NULL, 't' }, + { "verbose", no_argument, NULL, 'v' }, + { 0, 0, 0, 0 } + }; + int c; + + while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) { + switch (c) { + case '4': + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; + case '6': + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_IPV6); + break; + case 'D': + dmac = optarg; + break; + case 'i': + ifname = optarg; + break; + case 'r': + tx_socket = false; + break; + case 'S': + smac = optarg; + break; + case 't': + testname = optarg; + break; + case 'v': + verbose = true; + break; + default: + error(1, 0, "%s invalid option %c\n", __func__, c); + break; + } + } +} + +int main(int argc, char **argv) +{ + parse_args(argc, argv); + + if (proto == PF_INET) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr); + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET6) { + tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); + total_hdr_len = MAX_HDR_LEN; + } else { + error(1, 0, "Protocol family is not ipv4 or ipv6"); + } + + read_MAC(src_mac, smac); + read_MAC(dst_mac, dmac); + + if (tx_socket) + gro_sender(); + else + gro_receiver(); + return 0; +} diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh new file mode 100755 index 000000000000..794d2bf36dd7 --- /dev/null +++ b/tools/testing/selftests/net/gro.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source setup_loopback.sh +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" +readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") +readonly PROTOS=("ipv4" "ipv6") +dev="eth0" +test="all" +proto="ipv4" + +setup_interrupt() { + # Use timer on host to trigger the network stack + # Also disable device interrupt to not depend on NIC interrupt + # Reduce test flakiness caused by unexpected interrupts + echo 100000 >"${FLUSH_PATH}" + echo 50 >"${IRQ_PATH}" +} + +setup_ns() { + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}" + setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}" +} + +cleanup_ns() { + cleanup_macvlan_ns server_ns server client_ns client +} + +setup() { + setup_loopback_environment "${dev}" + setup_interrupt +} + +cleanup() { + cleanup_loopback "${dev}" + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" +} + +run_test() { + local server_pid=0 + local exit_code=0 + local protocol=$1 + local test=$2 + local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ + "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) + + setup_ns + # Each test is run 3 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + for tries in {1..3}; do + # Actual test starts here + ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ + 1>>log.txt & + server_pid=$! + sleep 0.5 # to allow for socket init + ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \ + 1>>log.txt + wait "${server_pid}" + exit_code=$? + if [[ "${exit_code}" -eq 0 ]]; then + break; + fi + done + cleanup_ns + echo ${exit_code} +} + +run_all_tests() { + local failed_tests=() + for proto in "${PROTOS[@]}"; do + for test in "${TESTS[@]}"; do + echo "running test ${proto} ${test}" >&2 + exit_code=$(run_test $proto $test) + if [[ "${exit_code}" -ne 0 ]]; then + failed_tests+=("${proto}_${test}") + fi; + done; + done + if [[ ${#failed_tests[@]} -ne 0 ]]; then + echo "failed tests: ${failed_tests[*]}. \ + Please see log.txt for more logs" + exit 1 + else + echo "All Tests Succeeded!" + fi; +} + +usage() { + echo "Usage: $0 \ + [-i <DEV>] \ + [-t data|ack|flags|tcp|ip|large] \ + [-p <ipv4|ipv6>]" 1>&2; + exit 1; +} + +while getopts "i:t:p:" opt; do + case "${opt}" in + i) + dev="${OPTARG}" + ;; + t) + test="${OPTARG}" + ;; + p) + proto="${OPTARG}" + ;; + *) + usage + ;; + esac +done + +readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" +readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" +readonly HARD_IRQS="$(< ${IRQ_PATH})" +setup +trap cleanup EXIT +if [[ "${test}" == "all" ]]; then + run_all_tests +else + run_test "${proto}" "${test}" +fi; diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh new file mode 100644 index 000000000000..bcf15487e584 --- /dev/null +++ b/tools/testing/selftests/net/ioam6.sh @@ -0,0 +1,297 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman@uliege.be> +# +# This test evaluates the IOAM insertion for IPv6 by checking the IOAM data +# integrity on the receiver. +# +# The topology is formed by 3 nodes: Alpha (sender), Beta (router in-between) +# and Gamma (receiver). An IOAM domain is configured from Alpha to Gamma only, +# which means not on the reverse path. When Gamma is the destination, Alpha +# adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop and fills the +# trace with its own IOAM data. Beta and Gamma also fill the trace. The IOAM +# data integrity is checked on Gamma, by comparing with the pre-defined IOAM +# configuration (see below). +# +# +-------------------+ +-------------------+ +# | | | | +# | alpha netns | | gamma netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | db01::2/64 | | | | db02::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +----------------------------------------------------+ +# | . . | +# | +-------------+ +-------------+ | +# | | veth0 | | veth1 | | +# | | db01::1/64 | ................ | db02::1/64 | | +# | +-------------+ +-------------+ | +# | | +# | beta netns | +# | | +# +--------------------------+-------------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~ +# | IOAM configuration | +# ~~~~~~~~~~~~~~~~~~~~~~ +# +# Alpha +# +-----------------------------------------------------------+ +# | Type | Value | +# +-----------------------------------------------------------+ +# | Node ID | 1 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 11111111 | +# +-----------------------------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Egress ID | 101 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 101101 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee0 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 777 | +# +-----------------------------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +-----------------------------------------------------------+ +# +# Note: When Gamma is the destination, Alpha adds an IOAM Pre-allocated Trace +# option inside a Hop-by-hop, where 164 bytes are pre-allocated for the +# trace, with 123 as the IOAM-Namespace and with 0xfff00200 as the trace +# type (= all available options at this time). As a result, and based on +# IOAM configurations here, only both Alpha and Beta should be capable of +# inserting their IOAM data while Gamma won't have enough space and will +# set the overflow bit. +# +# Beta +# +-----------------------------------------------------------+ +# | Type | Value | +# +-----------------------------------------------------------+ +# | Node ID | 2 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 22222222 | +# +-----------------------------------------------------------+ +# | Ingress ID | 201 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 201201 | +# +-----------------------------------------------------------+ +# | Egress ID | 202 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 202202 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee1 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf11dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +-----------------------------------------------------------+ +# | Schema Data | | +# +-----------------------------------------------------------+ +# +# Gamma +# +-----------------------------------------------------------+ +# | Type | Value | +# +-----------------------------------------------------------+ +# | Node ID | 3 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 33333333 | +# +-----------------------------------------------------------+ +# | Ingress ID | 301 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 301301 | +# +-----------------------------------------------------------+ +# | Egress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee2 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf22dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +-----------------------------------------------------------+ +# | Schema Data | | +# +-----------------------------------------------------------+ + +#=============================================================================== +# +# WARNING: +# Do NOT modify the following configuration unless you know what you're doing. +# +IOAM_NAMESPACE=123 +IOAM_TRACE_TYPE=0xfff00200 +IOAM_PREALLOC_DATA_SIZE=164 + +ALPHA=( + 1 # ID + 11111111 # Wide ID + 0xffff # Ingress ID + 0xffffffff # Ingress Wide ID + 101 # Egress ID + 101101 # Egress Wide ID + 0xdeadbee0 # Namespace Data + 0xcafec0caf00dc0de # Namespace Wide Data + 777 # Schema ID (0xffffff = None) + "something that will be 4n-aligned" # Schema Data +) + +BETA=( + 2 + 22222222 + 201 + 201201 + 202 + 202202 + 0xdeadbee1 + 0xcafec0caf11dc0de + 0xffffff + "" +) + +GAMMA=( + 3 + 33333333 + 301 + 301301 + 0xffff + 0xffffffff + 0xdeadbee2 + 0xcafec0caf22dc0de + 0xffffff + "" +) +#=============================================================================== + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit 1 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 1 +fi + +ip ioam &>/dev/null +if [ $? = 1 ]; then + echo "SKIP: ip tool must include IOAM" + exit 1 +fi + +if [ ! -e /proc/sys/net/ipv6/ioam6_id ]; then + echo "SKIP: ioam6 sysctls do not exist" + exit 1 +fi + +cleanup() +{ + ip link del ioam-veth-alpha 2>/dev/null || true + ip link del ioam-veth-gamma 2>/dev/null || true + + ip netns del ioam-node-alpha || true + ip netns del ioam-node-beta || true + ip netns del ioam-node-gamma || true +} + +setup() +{ + ip netns add ioam-node-alpha + ip netns add ioam-node-beta + ip netns add ioam-node-gamma + + ip link add name ioam-veth-alpha type veth peer name ioam-veth-betaL + ip link add name ioam-veth-betaR type veth peer name ioam-veth-gamma + + ip link set ioam-veth-alpha netns ioam-node-alpha + ip link set ioam-veth-betaL netns ioam-node-beta + ip link set ioam-veth-betaR netns ioam-node-beta + ip link set ioam-veth-gamma netns ioam-node-gamma + + ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0 + ip -netns ioam-node-beta link set ioam-veth-betaL name veth0 + ip -netns ioam-node-beta link set ioam-veth-betaR name veth1 + ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0 + + ip -netns ioam-node-alpha addr add db01::2/64 dev veth0 + ip -netns ioam-node-alpha link set veth0 up + ip -netns ioam-node-alpha link set lo up + ip -netns ioam-node-alpha route add default via db01::1 + + ip -netns ioam-node-beta addr add db01::1/64 dev veth0 + ip -netns ioam-node-beta addr add db02::1/64 dev veth1 + ip -netns ioam-node-beta link set veth0 up + ip -netns ioam-node-beta link set veth1 up + ip -netns ioam-node-beta link set lo up + + ip -netns ioam-node-gamma addr add db02::2/64 dev veth0 + ip -netns ioam-node-gamma link set veth0 up + ip -netns ioam-node-gamma link set lo up + ip -netns ioam-node-gamma route add default via db02::1 + + # - IOAM config - + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} + ip -netns ioam-node-alpha ioam namespace add ${IOAM_NAMESPACE} data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" + ip -netns ioam-node-alpha ioam namespace set ${IOAM_NAMESPACE} schema ${ALPHA[8]} + ip -netns ioam-node-alpha route add db02::/64 encap ioam6 trace type ${IOAM_TRACE_TYPE:0:-2} ns ${IOAM_NAMESPACE} size ${IOAM_PREALLOC_DATA_SIZE} via db01::1 dev veth0 + + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1 + ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} + ip -netns ioam-node-beta ioam namespace add ${IOAM_NAMESPACE} data ${BETA[6]} wide ${BETA[7]} + + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} + ip -netns ioam-node-gamma ioam namespace add ${IOAM_NAMESPACE} data ${GAMMA[6]} wide ${GAMMA[7]} +} + +run() +{ + echo -n "IOAM test... " + + ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + if [ $? != 0 ]; then + echo "FAILED" + cleanup &>/dev/null + exit 0 + fi + + ip netns exec ioam-node-gamma ./ioam6_parser veth0 2 ${IOAM_NAMESPACE} ${IOAM_TRACE_TYPE} 64 ${ALPHA[0]} ${ALPHA[1]} ${ALPHA[2]} ${ALPHA[3]} ${ALPHA[4]} ${ALPHA[5]} ${ALPHA[6]} ${ALPHA[7]} ${ALPHA[8]} "${ALPHA[9]}" 63 ${BETA[0]} ${BETA[1]} ${BETA[2]} ${BETA[3]} ${BETA[4]} ${BETA[5]} ${BETA[6]} ${BETA[7]} ${BETA[8]} & + + local spid=$! + sleep 0.1 + + ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + + wait $spid + [ $? = 0 ] && echo "PASSED" || echo "FAILED" +} + +cleanup &>/dev/null +setup +run +cleanup &>/dev/null diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c new file mode 100644 index 000000000000..2256cf5ad637 --- /dev/null +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Author: Justin Iurman (justin.iurman@uliege.be) + * + * IOAM parser for IPv6, see ioam6.sh for details. + */ +#include <asm/byteorder.h> +#include <linux/const.h> +#include <linux/if_ether.h> +#include <linux/ioam6.h> +#include <linux/ipv6.h> +#include <sys/socket.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +struct node_args { + __u32 id; + __u64 wide; + __u16 ingr_id; + __u16 egr_id; + __u32 ingr_wide; + __u32 egr_wide; + __u32 ns_data; + __u64 ns_wide; + __u32 sc_id; + __u8 hop_limit; + __u8 *sc_data; /* NULL when sc_id = 0xffffff (default empty value) */ +}; + +/* expected args per node, in that order */ +enum { + NODE_ARG_HOP_LIMIT, + NODE_ARG_ID, + NODE_ARG_WIDE, + NODE_ARG_INGR_ID, + NODE_ARG_INGR_WIDE, + NODE_ARG_EGR_ID, + NODE_ARG_EGR_WIDE, + NODE_ARG_NS_DATA, + NODE_ARG_NS_WIDE, + NODE_ARG_SC_ID, + __NODE_ARG_MAX, +}; + +#define NODE_ARGS_SIZE __NODE_ARG_MAX + +struct args { + __u16 ns_id; + __u32 trace_type; + __u8 n_node; + __u8 *ifname; + struct node_args node[0]; +}; + +/* expected args, in that order */ +enum { + ARG_IFNAME, + ARG_N_NODE, + ARG_NS_ID, + ARG_TRACE_TYPE, + __ARG_MAX, +}; + +#define ARGS_SIZE __ARG_MAX + +int check_ioam6_node_data(__u8 **p, struct ioam6_trace_hdr *trace, __u8 hlim, + __u32 id, __u64 wide, __u16 ingr_id, __u32 ingr_wide, + __u16 egr_id, __u32 egr_wide, __u32 ns_data, + __u64 ns_wide, __u32 sc_id, __u8 *sc_data) +{ + __u64 raw64; + __u32 raw32; + __u8 sc_len; + + if (trace->type.bit0) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + if (hlim != (raw32 >> 24) || id != (raw32 & 0xffffff)) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit1) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + if (ingr_id != (raw32 >> 16) || egr_id != (raw32 & 0xffff)) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit2) + *p += sizeof(__u32); + + if (trace->type.bit3) + *p += sizeof(__u32); + + if (trace->type.bit4) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit5) { + if (__be32_to_cpu(*((__u32 *)*p)) != ns_data) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit6) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit7) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit8) { + raw64 = __be64_to_cpu(*((__u64 *)*p)); + if (hlim != (raw64 >> 56) || wide != (raw64 & 0xffffffffffffff)) + return 1; + *p += sizeof(__u64); + } + + if (trace->type.bit9) { + if (__be32_to_cpu(*((__u32 *)*p)) != ingr_wide) + return 1; + *p += sizeof(__u32); + + if (__be32_to_cpu(*((__u32 *)*p)) != egr_wide) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit10) { + if (__be64_to_cpu(*((__u64 *)*p)) != ns_wide) + return 1; + *p += sizeof(__u64); + } + + if (trace->type.bit11) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit22) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + sc_len = sc_data ? __ALIGN_KERNEL(strlen(sc_data), 4) : 0; + if (sc_len != (raw32 >> 24) * 4 || sc_id != (raw32 & 0xffffff)) + return 1; + *p += sizeof(__u32); + + if (sc_data) { + if (strncmp(*p, sc_data, strlen(sc_data))) + return 1; + + *p += strlen(sc_data); + sc_len -= strlen(sc_data); + + while (sc_len--) { + if (**p != '\0') + return 1; + *p += sizeof(__u8); + } + } + } + + return 0; +} + +int check_ioam6_trace(struct ioam6_trace_hdr *trace, struct args *args) +{ + __u8 *p; + int i; + + if (__be16_to_cpu(trace->namespace_id) != args->ns_id || + __be32_to_cpu(trace->type_be32) != args->trace_type) + return 1; + + p = trace->data + trace->remlen * 4; + + for (i = args->n_node - 1; i >= 0; i--) { + if (check_ioam6_node_data(&p, trace, + args->node[i].hop_limit, + args->node[i].id, + args->node[i].wide, + args->node[i].ingr_id, + args->node[i].ingr_wide, + args->node[i].egr_id, + args->node[i].egr_wide, + args->node[i].ns_data, + args->node[i].ns_wide, + args->node[i].sc_id, + args->node[i].sc_data)) + return 1; + } + + return 0; +} + +int parse_node_args(int *argcp, char ***argvp, struct node_args *node) +{ + char **argv = *argvp; + + if (*argcp < NODE_ARGS_SIZE) + return 1; + + node->hop_limit = strtoul(argv[NODE_ARG_HOP_LIMIT], NULL, 10); + if (!node->hop_limit) { + node->hop_limit = strtoul(argv[NODE_ARG_HOP_LIMIT], NULL, 16); + if (!node->hop_limit) + return 1; + } + + node->id = strtoul(argv[NODE_ARG_ID], NULL, 10); + if (!node->id) { + node->id = strtoul(argv[NODE_ARG_ID], NULL, 16); + if (!node->id) + return 1; + } + + node->wide = strtoull(argv[NODE_ARG_WIDE], NULL, 10); + if (!node->wide) { + node->wide = strtoull(argv[NODE_ARG_WIDE], NULL, 16); + if (!node->wide) + return 1; + } + + node->ingr_id = strtoul(argv[NODE_ARG_INGR_ID], NULL, 10); + if (!node->ingr_id) { + node->ingr_id = strtoul(argv[NODE_ARG_INGR_ID], NULL, 16); + if (!node->ingr_id) + return 1; + } + + node->ingr_wide = strtoul(argv[NODE_ARG_INGR_WIDE], NULL, 10); + if (!node->ingr_wide) { + node->ingr_wide = strtoul(argv[NODE_ARG_INGR_WIDE], NULL, 16); + if (!node->ingr_wide) + return 1; + } + + node->egr_id = strtoul(argv[NODE_ARG_EGR_ID], NULL, 10); + if (!node->egr_id) { + node->egr_id = strtoul(argv[NODE_ARG_EGR_ID], NULL, 16); + if (!node->egr_id) + return 1; + } + + node->egr_wide = strtoul(argv[NODE_ARG_EGR_WIDE], NULL, 10); + if (!node->egr_wide) { + node->egr_wide = strtoul(argv[NODE_ARG_EGR_WIDE], NULL, 16); + if (!node->egr_wide) + return 1; + } + + node->ns_data = strtoul(argv[NODE_ARG_NS_DATA], NULL, 16); + if (!node->ns_data) + return 1; + + node->ns_wide = strtoull(argv[NODE_ARG_NS_WIDE], NULL, 16); + if (!node->ns_wide) + return 1; + + node->sc_id = strtoul(argv[NODE_ARG_SC_ID], NULL, 10); + if (!node->sc_id) { + node->sc_id = strtoul(argv[NODE_ARG_SC_ID], NULL, 16); + if (!node->sc_id) + return 1; + } + + *argcp -= NODE_ARGS_SIZE; + *argvp += NODE_ARGS_SIZE; + + if (node->sc_id != 0xffffff) { + if (!*argcp) + return 1; + + node->sc_data = argv[NODE_ARG_SC_ID + 1]; + + *argcp -= 1; + *argvp += 1; + } + + return 0; +} + +struct args *parse_args(int argc, char **argv) +{ + struct args *args; + int n_node, i; + + if (argc < ARGS_SIZE) + goto out; + + n_node = strtoul(argv[ARG_N_NODE], NULL, 10); + if (!n_node || n_node > 10) + goto out; + + args = calloc(1, sizeof(*args) + n_node * sizeof(struct node_args)); + if (!args) + goto out; + + args->ns_id = strtoul(argv[ARG_NS_ID], NULL, 10); + if (!args->ns_id) + goto free; + + args->trace_type = strtoul(argv[ARG_TRACE_TYPE], NULL, 16); + if (!args->trace_type) + goto free; + + args->n_node = n_node; + args->ifname = argv[ARG_IFNAME]; + + argv += ARGS_SIZE; + argc -= ARGS_SIZE; + + for (i = 0; i < n_node; i++) { + if (parse_node_args(&argc, &argv, &args->node[i])) + goto free; + } + + if (argc) + goto free; + + return args; +free: + free(args); +out: + return NULL; +} + +int main(int argc, char **argv) +{ + int ret, fd, pkts, size, hoplen, found; + struct ioam6_trace_hdr *ioam6h; + struct ioam6_hdr *opt; + struct ipv6hdr *ip6h; + __u8 buffer[400], *p; + struct args *args; + + args = parse_args(argc - 1, argv + 1); + if (!args) { + ret = 1; + goto out; + } + + fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); + if (!fd) { + ret = 1; + goto out; + } + + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + args->ifname, strlen(args->ifname))) { + ret = 1; + goto close; + } + + pkts = 0; + found = 0; + while (pkts < 3 && !found) { + size = recv(fd, buffer, sizeof(buffer), 0); + ip6h = (struct ipv6hdr *)buffer; + pkts++; + + if (ip6h->nexthdr == IPPROTO_HOPOPTS) { + p = buffer + sizeof(*ip6h); + hoplen = (p[1] + 1) << 3; + + p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)p; + + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + found = 1; + + p += sizeof(*opt); + ioam6h = (struct ioam6_trace_hdr *)p; + + ret = check_ioam6_trace(ioam6h, args); + break; + } + + p += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; + } + } + } + + if (!found) + ret = 1; +close: + close(fd); +out: + free(args); + return ret; +} diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh index 170be65e0816..1cbfeb5052ec 100755 --- a/tools/testing/selftests/net/psock_snd.sh +++ b/tools/testing/selftests/net/psock_snd.sh @@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)" echo "raw gso min size" ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" -echo "raw gso min size - 1 (expected to fail)" -(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") - echo "raw gso max size" ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh new file mode 100755 index 000000000000..0a8ad97b07ea --- /dev/null +++ b/tools/testing/selftests/net/setup_loopback.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +netdev_check_for_carrier() { + local -r dev="$1" + + for i in {1..5}; do + carrier="$(cat /sys/class/net/${dev}/carrier)" + if [[ "${carrier}" -ne 1 ]] ; then + echo "carrier not ready yet..." >&2 + sleep 1 + else + echo "carrier ready" >&2 + break + fi + done + echo "${carrier}" +} + +# Assumes that there is no existing ipvlan device on the physical device +setup_loopback_environment() { + local dev="$1" + + # Fail hard if cannot turn on loopback mode for current NIC + ethtool -K "${dev}" loopback on || exit 1 + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} + +setup_macvlan_ns(){ + local -r link_dev="$1" + local -r ns_name="$2" + local -r ns_dev="$3" + local -r ns_mac="$4" + local -r addr="$5" + + ip link add link "${link_dev}" dev "${ns_dev}" \ + address "${ns_mac}" type macvlan + exit_code=$? + if [[ "${exit_code}" -ne 0 ]]; then + echo "setup_macvlan_ns failed" + exit $exit_code + fi + + [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" + ip link set dev "${ns_dev}" netns "${ns_name}" + ip -netns "${ns_name}" link set dev "${ns_dev}" up + if [[ -n "${addr}" ]]; then + ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" + fi + + sleep 1 +} + +cleanup_macvlan_ns(){ + while (( $# >= 2 )); do + ns_name="$1" + ns_dev="$2" + ip -netns "${ns_name}" link del dev "${ns_dev}" + ip netns del "${ns_name}" + shift 2 + done +} + +cleanup_loopback(){ + local -r dev="$1" + + ethtool -K "${dev}" loopback off + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c new file mode 100644 index 000000000000..710ac956bdb3 --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Toeplitz test + * + * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 + * 2. Compute the rx_hash in software based on the packet contents + * 3. Compare the two + * + * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. + * + * If '-C $rx_irq_cpu_list' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the rxqueue that RSS would select based on this rx_hash + * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq + * 7. Compare the cpus from 4 and 6 + * + * Else if '-r $rps_bitmap' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap + * 6. Compare the cpus from 4 and 5 + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#define TOEPLITZ_KEY_MIN_LEN 40 +#define TOEPLITZ_KEY_MAX_LEN 60 + +#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ +#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) +#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) + +#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) + +#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ + +#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ + +/* configuration options (cmdline arguments) */ +static uint16_t cfg_dport = 8000; +static int cfg_family = AF_INET6; +static char *cfg_ifname = "eth0"; +static int cfg_num_queues; +static int cfg_num_rps_cpus; +static bool cfg_sink; +static int cfg_type = SOCK_STREAM; +static int cfg_timeout_msec = 1000; +static bool cfg_verbose; + +/* global vars */ +static int num_cpus; +static int ring_block_nr; +static int ring_block_sz; + +/* stats */ +static int frames_received; +static int frames_nohash; +static int frames_error; + +#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) + +/* tpacket ring */ +struct ring_state { + int fd; + char *mmap; + int idx; + int cpu; +}; + +static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ +static int rps_silo_to_cpu[RPS_MAX_CPUS]; +static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static struct ring_state rings[RSS_MAX_CPUS]; + +static inline uint32_t toeplitz(const unsigned char *four_tuple, + const unsigned char *key) +{ + int i, bit, ret = 0; + uint32_t key32; + + key32 = ntohl(*((uint32_t *)key)); + key += 4; + + for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { + for (bit = 7; bit >= 0; bit--) { + if (four_tuple[i] & (1 << bit)) + ret ^= key32; + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ +static void verify_rss(uint32_t rx_hash, int cpu) +{ + int queue = rx_hash % cfg_num_queues; + + log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); + if (rx_irq_cpus[queue] != cpu) { + log_verbose(". error: rss cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void verify_rps(uint64_t rx_hash, int cpu) +{ + int silo = (rx_hash * cfg_num_rps_cpus) >> 32; + + log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); + if (rps_silo_to_cpu[silo] != cpu) { + log_verbose(". error: rps cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void log_rxhash(int cpu, uint32_t rx_hash, + const char *addrs, int addr_len) +{ + char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; + uint16_t *ports; + + if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || + !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) + error(1, 0, "address parse error"); + + ports = (void *)addrs + (addr_len * 2); + log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", + cpu, rx_hash, saddr, daddr, + ntohs(ports[0]), ntohs(ports[1])); +} + +/* Compare computed rxhash with rxhash received from tpacket_v3 */ +static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) +{ + unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; + uint32_t rx_hash_sw; + const char *addrs; + int addr_len; + + if (cfg_family == AF_INET) { + addr_len = sizeof(struct in_addr); + addrs = pkt + offsetof(struct iphdr, saddr); + } else { + addr_len = sizeof(struct in6_addr); + addrs = pkt + offsetof(struct ip6_hdr, ip6_src); + } + + memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); + rx_hash_sw = toeplitz(four_tuple, toeplitz_key); + + if (cfg_verbose) + log_rxhash(cpu, rx_hash, addrs, addr_len); + + if (rx_hash != rx_hash_sw) { + log_verbose(" != expected 0x%x\n", rx_hash_sw); + frames_error++; + return; + } + + log_verbose(" OK"); + if (cfg_num_queues) + verify_rss(rx_hash, cpu); + else if (cfg_num_rps_cpus) + verify_rps(rx_hash, cpu); + log_verbose("\n"); +} + +static char *recv_frame(const struct ring_state *ring, char *frame) +{ + struct tpacket3_hdr *hdr = (void *)frame; + + if (hdr->hv1.tp_rxhash) + verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, + ring->cpu); + else + frames_nohash++; + + return frame + hdr->tp_next_offset; +} + +/* A single TPACKET_V3 block can hold multiple frames */ +static void recv_block(struct ring_state *ring) +{ + struct tpacket_block_desc *block; + char *frame; + int i; + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) + return; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; + + for (i = 0; i < block->hdr.bh1.num_pkts; i++) { + frame = recv_frame(ring, frame); + frames_received++; + } + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; +} + +/* simple test: sleep once unconditionally and then process all rings */ +static void process_rings(void) +{ + int i; + + usleep(1000 * cfg_timeout_msec); + + for (i = 0; i < num_cpus; i++) + recv_block(&rings[i]); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, + frames_nohash, frames_error); +} + +static char *setup_ring(int fd) +{ + struct tpacket_req3 req3 = {0}; + void *ring; + + req3.tp_retire_blk_tov = cfg_timeout_msec; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; + req3.tp_block_nr = 2; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; + + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) + error(1, errno, "setsockopt PACKET_RX_RING"); + + ring_block_sz = req3.tp_block_size; + ring_block_nr = req3.tp_block_nr; + + ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); + if (ring == MAP_FAILED) + error(1, 0, "mmap failed"); + + return ring; +} + +static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +/* filter on transport protocol and destination port */ +static void set_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + uint8_t proto; + + proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + if (cfg_family == AF_INET) + __set_filter(fd, offsetof(struct iphdr, protocol), proto, + sizeof(struct iphdr) + off_dport); + else + __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, + sizeof(struct ip6_hdr) + off_dport); +} + +/* drop everything: used temporarily during setup */ +static void set_filter_null(int fd) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET + BPF_K, 0), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static int create_ring(char **ring) +{ + struct fanout_args args = { + .id = 1, + .type_flags = PACKET_FANOUT_CPU, + .max_num_members = RSS_MAX_CPUS + }; + struct sockaddr_ll ll = { 0 }; + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket creation failed"); + + val = TPACKET_V3; + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) + error(1, errno, "setsockopt PACKET_VERSION"); + *ring = setup_ring(fd); + + /* block packets until all rings are added to the fanout group: + * else packets can arrive during setup and get misclassified + */ + set_filter_null(fd); + + ll.sll_family = AF_PACKET; + ll.sll_ifindex = if_nametoindex(cfg_ifname); + ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + if (bind(fd, (void *)&ll, sizeof(ll))) + error(1, errno, "bind"); + + /* must come after bind: verifies all programs in group match */ + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { + /* on failure, retry using old API if that is sufficient: + * it has a hard limit of 256 sockets, so only try if + * (a) only testing rxhash, not RSS or (b) <= 256 cpus. + * in this API, the third argument is left implicit. + */ + if (cfg_num_queues || num_cpus > 256 || + setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &args, sizeof(uint32_t))) + error(1, errno, "setsockopt PACKET_FANOUT cpu"); + } + + return fd; +} + +/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ +static int setup_sink(void) +{ + int fd, val; + + fd = socket(cfg_family, cfg_type, 0); + if (fd == -1) + error(1, errno, "socket %d.%d", cfg_family, cfg_type); + + val = 1 << 20; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + + return fd; +} + +static void setup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + rings[i].cpu = i; + rings[i].fd = create_ring(&rings[i].mmap); + } + + /* accept packets once all rings in the fanout group are up */ + for (i = 0; i < num_cpus; i++) + set_filter(rings[i].fd); +} + +static void cleanup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) + error(1, errno, "munmap"); + if (close(rings[i].fd)) + error(1, errno, "close"); + } +} + +static void parse_cpulist(const char *arg) +{ + do { + rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); + + arg = strchr(arg, ','); + if (!arg) + break; + arg++; // skip ',' + } while (1); +} + +static void show_cpulist(void) +{ + int i; + + for (i = 0; i < cfg_num_queues; i++) + fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); +} + +static void show_silos(void) +{ + int i; + + for (i = 0; i < cfg_num_rps_cpus; i++) + fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); +} + +static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) +{ + int i, ret, off; + + if (slen < TOEPLITZ_STR_MIN_LEN || + slen > TOEPLITZ_STR_MAX_LEN + 1) + error(1, 0, "invalid toeplitz key"); + + for (i = 0, off = 0; off < slen; i++, off += 3) { + ret = sscanf(str + off, "%hhx", &key[i]); + if (ret != 1) + error(1, 0, "key parse error at %d off %d len %d", + i, off, slen); + } +} + +static void parse_rps_bitmap(const char *arg) +{ + unsigned long bitmap; + int i; + + bitmap = strtoul(arg, NULL, 0); + + if (bitmap & ~(RPS_MAX_CPUS - 1)) + error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", + bitmap, RPS_MAX_CPUS - 1); + + for (i = 0; i < RPS_MAX_CPUS; i++) + if (bitmap & 1UL << i) + rps_silo_to_cpu[cfg_num_rps_cpus++] = i; +} + +static void parse_opts(int argc, char **argv) +{ + static struct option long_options[] = { + {"dport", required_argument, 0, 'd'}, + {"cpus", required_argument, 0, 'C'}, + {"key", required_argument, 0, 'k'}, + {"iface", required_argument, 0, 'i'}, + {"ipv4", no_argument, 0, '4'}, + {"ipv6", no_argument, 0, '6'}, + {"sink", no_argument, 0, 's'}, + {"tcp", no_argument, 0, 't'}, + {"timeout", required_argument, 0, 'T'}, + {"udp", no_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"rps", required_argument, 0, 'r'}, + {0, 0, 0, 0} + }; + bool have_toeplitz = false; + int index, c; + + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) { + switch (c) { + case '4': + cfg_family = AF_INET; + break; + case '6': + cfg_family = AF_INET6; + break; + case 'C': + parse_cpulist(optarg); + break; + case 'd': + cfg_dport = strtol(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'k': + parse_toeplitz_key(optarg, strlen(optarg), + toeplitz_key); + have_toeplitz = true; + break; + case 'r': + parse_rps_bitmap(optarg); + break; + case 's': + cfg_sink = true; + break; + case 't': + cfg_type = SOCK_STREAM; + break; + case 'T': + cfg_timeout_msec = strtol(optarg, NULL, 0); + break; + case 'u': + cfg_type = SOCK_DGRAM; + break; + case 'v': + cfg_verbose = true; + break; + + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!have_toeplitz) + error(1, 0, "Must supply rss key ('-k')"); + + num_cpus = get_nprocs(); + if (num_cpus > RSS_MAX_CPUS) + error(1, 0, "increase RSS_MAX_CPUS"); + + if (cfg_num_queues && cfg_num_rps_cpus) + error(1, 0, + "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); + if (cfg_verbose) { + show_cpulist(); + show_silos(); + } +} + +int main(int argc, char **argv) +{ + const int min_tests = 10; + int fd_sink = -1; + + parse_opts(argc, argv); + + if (cfg_sink) + fd_sink = setup_sink(); + + setup_rings(); + process_rings(); + cleanup_rings(); + + if (cfg_sink && close(fd_sink)) + error(1, errno, "close sink"); + + if (frames_received - frames_nohash < min_tests) + error(1, 0, "too few frames for verification"); + + return frames_error; +} diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh new file mode 100755 index 000000000000..0a49907cd4fe --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.sh @@ -0,0 +1,199 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping +# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu +# ('-rps <rps_map>') +# +# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, +# which is a driver-specific encoding. +# +# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ +# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] + +source setup_loopback.sh +readonly SERVER_IP4="192.168.1.200/24" +readonly SERVER_IP6="fda8::1/64" +readonly SERVER_MAC="aa:00:00:00:00:02" + +readonly CLIENT_IP4="192.168.1.100/24" +readonly CLIENT_IP6="fda8::2/64" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +PORT=8000 +KEY="$(</proc/sys/net/core/netdev_rss_key)" +TEST_RSS=false +RPS_MAP="" +PROTO_FLAG="" +IP_FLAG="" +DEV="eth0" + +# Return the number of rxqs among which RSS is configured to spread packets. +# This is determined by reading the RSS indirection table using ethtool. +get_rss_cfg_num_rxqs() { + echo $(ethtool -x "${DEV}" | + egrep [[:space:]]+[0-9]+:[[:space:]]+ | + cut -d: -f2- | + awk '{$1=$1};1' | + tr ' ' '\n' | + sort -u | + wc -l) +} + +# Return a list of the receive irq handler cpus. +# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. +# Reads /sys/kernel/irq/ in order, so algorithm depends on +# irq_{rxq-0} < irq_{rxq-1}, etc. +get_rx_irq_cpus() { + CPUS="" + # sort so that irq 2 is read before irq 10 + SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) + # Consider only as many queues as RSS actually uses. We assume that + # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). + RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) + RXQ_COUNT=0 + + for i in ${SORTED_IRQS} + do + [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break + # lookup relevant IRQs by action name + [[ -e "$i/actions" ]] || continue + cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue + irqname=$(<"$i/actions") + + # does the IRQ get called + irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') + [[ -n "${irqcount}" ]] || continue + + # lookup CPU + irq=$(basename "$i") + cpu=$(cat "/proc/irq/$irq/smp_affinity_list") + + if [[ -z "${CPUS}" ]]; then + CPUS="${cpu}" + else + CPUS="${CPUS},${cpu}" + fi + RXQ_COUNT=$((RXQ_COUNT+1)) + done + + echo "${CPUS}" +} + +get_disable_rfs_cmd() { + echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" +} + +get_set_rps_bitmaps_cmd() { + CMD="" + for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus + do + CMD="${CMD} echo $1 > ${i};" + done + + echo "${CMD}" +} + +get_disable_rps_cmd() { + echo "$(get_set_rps_bitmaps_cmd 0)" +} + +die() { + echo "$1" + exit 1 +} + +check_nic_rxhash_enabled() { + local -r pattern="receive-hashing:\ on" + + ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" +} + +parse_opts() { + local prog=$0 + shift 1 + + while [[ "$1" =~ "-" ]]; do + if [[ "$1" = "-irq_prefix" ]]; then + shift + IRQ_PATTERN="^$1-[0-9]*$" + elif [[ "$1" = "-u" || "$1" = "-t" ]]; then + PROTO_FLAG="$1" + elif [[ "$1" = "-4" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP4}" + CLIENT_IP="${CLIENT_IP4}" + elif [[ "$1" = "-6" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP6}" + CLIENT_IP="${CLIENT_IP6}" + elif [[ "$1" = "-rss" ]]; then + TEST_RSS=true + elif [[ "$1" = "-rps" ]]; then + shift + RPS_MAP="$1" + elif [[ "$1" = "-i" ]]; then + shift + DEV="$1" + else + die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ + [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" + fi + shift + done +} + +setup() { + setup_loopback_environment "${DEV}" + + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${DEV}" server_ns server \ + "${SERVER_MAC}" "${SERVER_IP}" + setup_macvlan_ns "${DEV}" client_ns client \ + "${CLIENT_MAC}" "${CLIENT_IP}" +} + +cleanup() { + cleanup_macvlan_ns server_ns server client_ns client + cleanup_loopback "${DEV}" +} + +parse_opts $0 $@ + +setup +trap cleanup EXIT + +check_nic_rxhash_enabled + +# Actual test starts here +if [[ "${TEST_RSS}" = true ]]; then + # RPS/RFS must be disabled because they move packets between cpus, + # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. + eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -C "$(get_rx_irq_cpus)" -s -v & +elif [[ ! -z "${RPS_MAP}" ]]; then + eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -r "0x${RPS_MAP}" -s -v & +else + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & +fi + +server_pid=$! + +ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ + "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & + +client_pid=$! + +wait "${server_pid}" +exit_code=$? +kill -9 "${client_pid}" +if [[ "${exit_code}" -eq 0 ]]; then + echo "Test Succeeded!" +fi +exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh new file mode 100755 index 000000000000..2fef34f4aba1 --- /dev/null +++ b/tools/testing/selftests/net/toeplitz_client.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A simple program for generating traffic for the toeplitz test. +# +# This program sends packets periodically for, conservatively, 20 seconds. The +# intent is for the calling program to kill this program once it is no longer +# needed, rather than waiting for the 20 second expiration. + +send_traffic() { + expiration=$((SECONDS+20)) + while [[ "${SECONDS}" -lt "${expiration}" ]] + do + if [[ "${PROTO}" == "-u" ]]; then + echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" + else + echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" + fi + sleep 0.001 + done +} + +PROTO=$1 +IPVER=$2 +ADDR=$3 +PORT=$4 + +send_traffic diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 11d7cdb898c0..19eac3e44c06 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST readonly BM_NET_V4=192.168.1. readonly BM_NET_V6=2001:db8:: -readonly NPROCS=`nproc` +readonly CPUS=`nproc` ret=0 cleanup() { @@ -75,6 +75,29 @@ chk_tso_flag() { __chk_flag "$1" $2 $3 tcp-segmentation-offload } +chk_channels() { + local msg="$1" + local target=$2 + local rx=$3 + local tx=$4 + + local dev=veth$target + + local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\ + grep RX: | tail -n 1 | awk '{print $2}' ` + local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\ + grep TX: | tail -n 1 | awk '{print $2}'` + local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\ + grep Combined: | tail -n 1 | awk '{print $2}'` + + printf "%-60s" "$msg" + if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then + echo " ok " + else + echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined" + fi +} + chk_gro() { local msg="$1" local expected=$2 @@ -107,11 +130,100 @@ chk_gro() { fi } +__change_channels() +{ + local cur_cpu + local end=$1 + local cur + local i + + while true; do + printf -v cur '%(%s)T' + [ $cur -le $end ] || break + + for i in `seq 1 $CPUS`; do + ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i + ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i + done + + for i in `seq 1 $((CPUS - 1))`; do + cur_cpu=$((CPUS - $i)) + ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu + ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu + done + done +} + +__send_data() { + local end=$1 + + while true; do + printf -v cur '%(%s)T' + [ $cur -le $end ] || break + + ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST + done +} + +do_stress() { + local end + printf -v end '%(%s)T' + end=$((end + $STRESS)) + + ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3 + ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3 + + ip netns exec $NS_DST ./udpgso_bench_rx & + local rx_pid=$! + + echo "Running stress test for $STRESS seconds..." + __change_channels $end & + local ch_pid=$! + __send_data $end & + local data_pid_1=$! + __send_data $end & + local data_pid_2=$! + __send_data $end & + local data_pid_3=$! + __send_data $end & + local data_pid_4=$! + + wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4 + kill -9 $rx_pid + echo "done" + + # restore previous setting + ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2 + ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1 +} + +usage() { + echo "Usage: $0 [-h] [-s <seconds>]" + echo -e "\t-h: show this help" + echo -e "\t-s: run optional stress tests for the given amount of seconds" +} + +STRESS=0 +while getopts "hs:" option; do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "s") + STRESS=$OPTARG + ;; + esac +done + if [ ! -f ../bpf/xdp_dummy.o ]; then echo "Missing xdp_dummy helper. Build bpf selftest first" exit 1 fi +[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped" +[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too" + create_ns chk_gro_flag "default - gro flag" $SRC off chk_gro_flag " - peer gro flag" $DST off @@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns +chk_channels "default channels" $DST 1 1 + ip -n $NS_DST link set dev veth$DST down ip netns exec $NS_DST ethtool -K veth$DST gro on chk_gro_flag "with gro enabled on link down - gro flag" $DST on @@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns + +CUR_TX=1 +CUR_RX=1 +if [ $CPUS -gt 1 ]; then + ip netns exec $NS_DST ethtool -L veth$DST tx 2 + chk_channels "setting tx channels" $DST 1 2 + CUR_TX=2 +fi + +if [ $CPUS -gt 2 ]; then + ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3 + chk_channels "setting both rx and tx channels" $DST 3 3 + CUR_RX=3 + CUR_TX=3 +fi + +ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null +chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX + +ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null +chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX + +if [ $CPUS -gt 1 ]; then + # this also tests queues nr reduction + ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null + ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null + printf "%-60s" "bad setting: XDP with RX nr less than TX" + ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + section xdp_dummy 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + + # the following tests will run with multiple channels active + ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 + ip netns exec $NS_DST ethtool -L veth$DST rx 2 + ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + section xdp_dummy 2>/dev/null + printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" + ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + CUR_RX=2 + CUR_TX=2 +fi + +if [ $CPUS -gt 2 ]; then + printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set" + ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + chk_channels "setting invalid channels nr" $DST 2 2 +fi + ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off @@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC on chk_tso_flag " - peer tso flag" $DST on + +if [ $CPUS -gt 1 ]; then + ip netns exec $NS_DST ethtool -L veth$DST tx 1 + chk_channels "decreasing tx channels with device down" $DST 2 1 +fi + ip -n $NS_DST link set dev veth$DST up ip -n $NS_SRC link set dev veth$SRC up chk_gro " - aggregation" 1 +if [ $CPUS -gt 1 ]; then + [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress + + ip -n $NS_DST link set dev veth$DST down + ip -n $NS_SRC link set dev veth$SRC down + ip netns exec $NS_DST ethtool -L veth$DST tx 2 + chk_channels "increasing tx channels with device down" $DST 2 2 + ip -n $NS_DST link set dev veth$DST up + ip -n $NS_SRC link set dev veth$SRC up +fi + ip netns exec $NS_DST ethtool -K veth$DST gro off ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off chk_gro "aggregation again with default and TSO off" 10 diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json index 6eb4c4f97060..742f2290973e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json @@ -417,5 +417,29 @@ "teardown": [ "$TC actions flush action skbmod" ] + }, + { + "id": "fe09", + "name": "Add skbmod action to mark ECN bits", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod ecn", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbmod index 1", + "matchPattern": "action order [0-9]*: skbmod pipe ecn", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json new file mode 100644 index 000000000000..88a20c781e49 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -0,0 +1,137 @@ +[ + { + "id": "ce7d", + "name": "Add mq Qdisc to multi-queue device (4 queues)", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "4", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "2f82", + "name": "Add mq Qdisc to multi-queue device (256 queues)", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 256\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "256", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "c525", + "name": "Add duplicate mq Qdisc", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: mq" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "4", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "128a", + "name": "Delete nonexistent mq Qdisc", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "03a9", + "name": "Delete mq Qdisc twice", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: mq", + "$TC qdisc del dev $ETH root handle 1: mq" + ], + "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "be0f", + "name": "Add mq Qdisc to single-queue device", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index cd4a27ee1466..ea04f04c173e 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -17,6 +17,7 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'DUMMY': 'dummy1', + 'ETH': 'eth0', 'BATCH_FILE': './batch.txt', 'BATCH_DIR': 'tmp', # Length of time in seconds to wait before terminating a command |