diff options
349 files changed, 10330 insertions, 3934 deletions
diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst index a78bf036cadd..4d86780ab0f1 100644 --- a/Documentation/bpf/prog_flow_dissector.rst +++ b/Documentation/bpf/prog_flow_dissector.rst @@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel C-based implementation can export. Notable example is single VLAN (802.1Q) and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys`` for a set of information that's currently can be exported from the BPF context. + +When BPF flow dissector is attached to the root network namespace (machine-wide +policy), users can't override it in their child network namespaces. diff --git a/Documentation/devicetree/bindings/net/ftgmac100.txt b/Documentation/devicetree/bindings/net/ftgmac100.txt index 72e7aaf7242e..f878c1103463 100644 --- a/Documentation/devicetree/bindings/net/ftgmac100.txt +++ b/Documentation/devicetree/bindings/net/ftgmac100.txt @@ -9,6 +9,7 @@ Required properties: - "aspeed,ast2400-mac" - "aspeed,ast2500-mac" + - "aspeed,ast2600-mac" - reg: Address and length of the register set for the device - interrupts: Should contain ethernet controller interrupt @@ -23,6 +24,13 @@ Optional properties: - no-hw-checksum: Used to disable HW checksum support. Here for backward compatibility as the driver now should have correct defaults based on the SoC. +- clocks: In accordance with the generic clock bindings. Must describe the MAC + IP clock, and optionally an RMII RCLK gate for the AST2500/AST2600. The + required MAC clock must be the first cell. +- clock-names: + + - "MACCLK": The MAC IP clock + - "RCLK": Clock gate for the RMII RCLK Example: diff --git a/Documentation/devicetree/bindings/net/lpc-eth.txt b/Documentation/devicetree/bindings/net/lpc-eth.txt index b92e927808b6..cfe0e5991d46 100644 --- a/Documentation/devicetree/bindings/net/lpc-eth.txt +++ b/Documentation/devicetree/bindings/net/lpc-eth.txt @@ -10,6 +10,11 @@ Optional properties: absent, "rmii" is assumed. - use-iram: Use LPC32xx internal SRAM (IRAM) for DMA buffering +Optional subnodes: +- mdio : specifies the mdio bus, used as a container for phy nodes according to + phy.txt in the same directory + + Example: mac: ethernet@31060000 { diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml new file mode 100644 index 000000000000..7f84df9790e2 --- /dev/null +++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/renesas,ether.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas Electronics SH EtherMAC + +allOf: + - $ref: ethernet-controller.yaml# + +maintainers: + - Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> + +properties: + compatible: + oneOf: + - items: + - enum: + - renesas,gether-r8a7740 # device is a part of R8A7740 SoC + - renesas,gether-r8a77980 # device is a part of R8A77980 SoC + - renesas,ether-r7s72100 # device is a part of R7S72100 SoC + - renesas,ether-r7s9210 # device is a part of R7S9210 SoC + - items: + - enum: + - renesas,ether-r8a7778 # device is a part of R8A7778 SoC + - renesas,ether-r8a7779 # device is a part of R8A7779 SoC + - enum: + - renesas,rcar-gen1-ether # a generic R-Car Gen1 device + - items: + - enum: + - renesas,ether-r8a7745 # device is a part of R8A7745 SoC + - renesas,ether-r8a7743 # device is a part of R8A7743 SoC + - renesas,ether-r8a7790 # device is a part of R8A7790 SoC + - renesas,ether-r8a7791 # device is a part of R8A7791 SoC + - renesas,ether-r8a7793 # device is a part of R8A7793 SoC + - renesas,ether-r8a7794 # device is a part of R8A7794 SoC + - enum: + - renesas,rcar-gen2-ether # a generic R-Car Gen2 or RZ/G1 device + + reg: + items: + - description: E-DMAC/feLic registers + - description: TSU registers + minItems: 1 + + interrupts: + maxItems: 1 + + '#address-cells': + description: number of address cells for the MDIO bus + const: 1 + + '#size-cells': + description: number of size cells on the MDIO bus + const: 0 + + clocks: + maxItems: 1 + + pinctrl-0: true + + pinctrl-names: true + + renesas,no-ether-link: + type: boolean + description: + specify when a board does not provide a proper Ether LINK signal + + renesas,ether-link-active-low: + type: boolean + description: + specify when the Ether LINK signal is active-low instead of normal + active-high + +required: + - compatible + - reg + - interrupts + - phy-mode + - phy-handle + - '#address-cells' + - '#size-cells' + - clocks + - pinctrl-0 + +examples: + # Lager board + - | + #include <dt-bindings/clock/r8a7790-clock.h> + #include <dt-bindings/interrupt-controller/irq.h> + + ethernet@ee700000 { + compatible = "renesas,ether-r8a7790", "renesas,rcar-gen2-ether"; + reg = <0 0xee700000 0 0x400>; + interrupt-parent = <&gic>; + interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&mstp8_clks R8A7790_CLK_ETHER>; + phy-mode = "rmii"; + phy-handle = <&phy1>; + pinctrl-0 = <ðer_pins>; + pinctrl-names = "default"; + renesas,ether-link-active-low; + #address-cells = <1>; + #size-cells = <0>; + + phy1: ethernet-phy@1 { + reg = <1>; + interrupt-parent = <&irqc0>; + interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + pinctrl-0 = <&phy1_pins>; + pinctrl-names = "default"; + }; + }; diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt deleted file mode 100644 index abc36274227c..000000000000 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ /dev/null @@ -1,69 +0,0 @@ -* Renesas Electronics SH EtherMAC - -This file provides information on what the device node for the SH EtherMAC -interface contains. - -Required properties: -- compatible: Must contain one or more of the following: - "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC. - "renesas,ether-r8a7743" if the device is a part of R8A7743 SoC. - "renesas,ether-r8a7745" if the device is a part of R8A7745 SoC. - "renesas,ether-r8a7778" if the device is a part of R8A7778 SoC. - "renesas,ether-r8a7779" if the device is a part of R8A7779 SoC. - "renesas,ether-r8a7790" if the device is a part of R8A7790 SoC. - "renesas,ether-r8a7791" if the device is a part of R8A7791 SoC. - "renesas,ether-r8a7793" if the device is a part of R8A7793 SoC. - "renesas,ether-r8a7794" if the device is a part of R8A7794 SoC. - "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC. - "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC. - "renesas,ether-r7s9210" if the device is a part of R7S9210 SoC. - "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device. - "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1 - device. - - When compatible with the generic version, nodes must list - the SoC-specific version corresponding to the platform - first followed by the generic version. - -- reg: offset and length of (1) the E-DMAC/feLic register block (required), - (2) the TSU register block (optional). -- interrupts: interrupt specifier for the sole interrupt. -- phy-mode: see ethernet.txt file in the same directory. -- phy-handle: see ethernet.txt file in the same directory. -- #address-cells: number of address cells for the MDIO bus, must be equal to 1. -- #size-cells: number of size cells on the MDIO bus, must be equal to 0. -- clocks: clock phandle and specifier pair. -- pinctrl-0: phandle, referring to a default pin configuration node. - -Optional properties: -- pinctrl-names: pin configuration state name ("default"). -- renesas,no-ether-link: boolean, specify when a board does not provide a proper - Ether LINK signal. -- renesas,ether-link-active-low: boolean, specify when the Ether LINK signal is - active-low instead of normal active-high. - -Example (Lager board): - - ethernet@ee700000 { - compatible = "renesas,ether-r8a7790", - "renesas,rcar-gen2-ether"; - reg = <0 0xee700000 0 0x400>; - interrupt-parent = <&gic>; - interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&mstp8_clks R8A7790_CLK_ETHER>; - phy-mode = "rmii"; - phy-handle = <&phy1>; - pinctrl-0 = <ðer_pins>; - pinctrl-names = "default"; - renesas,ether-link-active-low; - #address-cells = <1>; - #size-cells = <0>; - - phy1: ethernet-phy@1 { - reg = <1>; - interrupt-parent = <&irqc0>; - interrupts = <0 IRQ_TYPE_LEVEL_LOW>; - pinctrl-0 = <&phy1_pins>; - pinctrl-names = "default"; - }; - }; diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index 5bcbf75e2025..8cb2cd4e2a80 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -213,3 +213,29 @@ A patchset to OpenSSL to use ktls as the record layer is of calling send directly after a handshake using gnutls. Since it doesn't implement a full record layer, control messages are not supported. + +Statistics +========== + +TLS implementation exposes the following per-namespace statistics +(``/proc/net/tls_stat``): + +- ``TlsCurrTxSw``, ``TlsCurrRxSw`` - + number of TX and RX sessions currently installed where host handles + cryptography + +- ``TlsCurrTxDevice``, ``TlsCurrRxDevice`` - + number of TX and RX sessions currently installed where NIC handles + cryptography + +- ``TlsTxSw``, ``TlsRxSw`` - + number of TX and RX sessions opened with host cryptography + +- ``TlsTxDevice``, ``TlsRxDevice`` - + number of TX and RX sessions opened with NIC cryptography + +- ``TlsDecryptError`` - + record decryption failed (e.g. due to incorrect authentication tag) + +- ``TlsDeviceRxResync`` - + number of RX resyncs sent to NICs handling cryptography diff --git a/MAINTAINERS b/MAINTAINERS index e51a68bf8ca8..aaa6ee71c000 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13815,7 +13815,7 @@ R: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org F: Documentation/devicetree/bindings/net/renesas,*.txt -F: Documentation/devicetree/bindings/net/sh_eth.txt +F: Documentation/devicetree/bindings/net/renesas,*.yaml F: drivers/net/ethernet/renesas/ F: include/linux/sh_eth.h diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 991549a1c5f3..3ad2ba1ad855 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -909,6 +909,16 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP32 | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: + /* test dst_reg, dst_reg to save one extra byte */ + if (imm32 == 0) { + if (BPF_CLASS(insn->code) == BPF_JMP) + EMIT1(add_2mod(0x48, dst_reg, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); + goto emit_cond_jmp; + } + /* cmp dst_reg, imm8/32 */ if (BPF_CLASS(insn->code) == BPF_JMP) EMIT1(add_1mod(0x48, dst_reg)); diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index 250150560e68..91e424378217 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -35,7 +35,7 @@ config CHELSIO_IPSEC_INLINE config CRYPTO_DEV_CHELSIO_TLS tristate "Chelsio Crypto Inline TLS Driver" depends on CHELSIO_T4 - depends on TLS + depends on TLS_TOE select CRYPTO_DEV_CHELSIO ---help--- Support Chelsio Inline TLS with Chelsio crypto accelerator. diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h index 025c831d0899..d2bc655ab931 100644 --- a/drivers/crypto/chelsio/chtls/chtls.h +++ b/drivers/crypto/chelsio/chtls/chtls.h @@ -21,6 +21,7 @@ #include <crypto/internal/hash.h> #include <linux/tls.h> #include <net/tls.h> +#include <net/tls_toe.h> #include "t4fw_api.h" #include "t4_msg.h" @@ -118,7 +119,7 @@ struct tls_scmd { }; struct chtls_dev { - struct tls_device tlsdev; + struct tls_toe_device tlsdev; struct list_head list; struct cxgb4_lld_info *lldi; struct pci_dev *pdev; @@ -362,7 +363,7 @@ enum { #define TCP_PAGE(sk) (sk->sk_frag.page) #define TCP_OFF(sk) (sk->sk_frag.offset) -static inline struct chtls_dev *to_chtls_dev(struct tls_device *tlsdev) +static inline struct chtls_dev *to_chtls_dev(struct tls_toe_device *tlsdev) { return container_of(tlsdev, struct chtls_dev, tlsdev); } diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index e6df5b95ed47..18996935d8ba 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -124,7 +124,7 @@ static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk) mutex_unlock(¬ify_mutex); } -static int chtls_inline_feature(struct tls_device *dev) +static int chtls_inline_feature(struct tls_toe_device *dev) { struct net_device *netdev; struct chtls_dev *cdev; @@ -140,7 +140,7 @@ static int chtls_inline_feature(struct tls_device *dev) return 0; } -static int chtls_create_hash(struct tls_device *dev, struct sock *sk) +static int chtls_create_hash(struct tls_toe_device *dev, struct sock *sk) { struct chtls_dev *cdev = to_chtls_dev(dev); @@ -149,7 +149,7 @@ static int chtls_create_hash(struct tls_device *dev, struct sock *sk) return 0; } -static void chtls_destroy_hash(struct tls_device *dev, struct sock *sk) +static void chtls_destroy_hash(struct tls_toe_device *dev, struct sock *sk) { struct chtls_dev *cdev = to_chtls_dev(dev); @@ -161,7 +161,7 @@ static void chtls_free_uld(struct chtls_dev *cdev) { int i; - tls_unregister_device(&cdev->tlsdev); + tls_toe_unregister_device(&cdev->tlsdev); kvfree(cdev->kmap.addr); idr_destroy(&cdev->hwtid_idr); for (i = 0; i < (1 << RSPQ_HASH_BITS); i++) @@ -173,27 +173,27 @@ static void chtls_free_uld(struct chtls_dev *cdev) static inline void chtls_dev_release(struct kref *kref) { + struct tls_toe_device *dev; struct chtls_dev *cdev; - struct tls_device *dev; - dev = container_of(kref, struct tls_device, kref); + dev = container_of(kref, struct tls_toe_device, kref); cdev = to_chtls_dev(dev); chtls_free_uld(cdev); } static void chtls_register_dev(struct chtls_dev *cdev) { - struct tls_device *tlsdev = &cdev->tlsdev; + struct tls_toe_device *tlsdev = &cdev->tlsdev; - strlcpy(tlsdev->name, "chtls", TLS_DEVICE_NAME_MAX); + strlcpy(tlsdev->name, "chtls", TLS_TOE_DEVICE_NAME_MAX); strlcat(tlsdev->name, cdev->lldi->ports[0]->name, - TLS_DEVICE_NAME_MAX); + TLS_TOE_DEVICE_NAME_MAX); tlsdev->feature = chtls_inline_feature; tlsdev->hash = chtls_create_hash; tlsdev->unhash = chtls_destroy_hash; tlsdev->release = chtls_dev_release; kref_init(&tlsdev->kref); - tls_register_device(tlsdev); + tls_toe_register_device(tlsdev); cdev->cdev_state = CHTLS_CDEV_STATE_UP; } diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 2b9a2f117113..96d7cef3289f 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -3,7 +3,13 @@ # CAIF physical drivers # -comment "CAIF transport drivers" +menuconfig CAIF_DRIVERS + bool "CAIF transport drivers" + depends on CAIF + help + Enable this to see CAIF physical drivers. + +if CAIF_DRIVERS config CAIF_TTY tristate "CAIF TTY transport driver" @@ -22,7 +28,7 @@ config CAIF_SPI_SLAVE The CAIF Link layer SPI Protocol driver for Slave SPI interface. This driver implements a platform driver to accommodate for a platform specific SPI device. A sample CAIF SPI Platform device is - provided in Documentation/networking/caif/spi_porting.txt + provided in <file:Documentation/networking/caif/spi_porting.txt>. config CAIF_SPI_SYNC bool "Next command and length in start of frame" @@ -38,7 +44,7 @@ config CAIF_HSI depends on CAIF default n ---help--- - The caif low level driver for CAIF over HSI. + The CAIF low level driver for CAIF over HSI. Be aware that if you enable this then you also need to enable a low-level HSI driver. @@ -50,8 +56,10 @@ config CAIF_VIRTIO select GENERIC_ALLOCATOR default n ---help--- - The caif driver for CAIF over Virtio. + The CAIF driver for CAIF over Virtio. if CAIF_VIRTIO source "drivers/vhost/Kconfig.vringh" endif + +endif # CAIF_DRIVERS diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index fdffd9e0c518..7d050fab0889 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -87,7 +87,6 @@ MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); static struct i2c_driver ksz9477_i2c_driver = { .driver = { .name = "ksz9477-switch", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz9477_dt_ids), }, .probe = ksz9477_i2c_probe, diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index fbb564c3beb8..91063ed3ef1b 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -21,6 +21,7 @@ #define SJA1105_AGEING_TIME_MS(ms) ((ms) / 10) #include "sja1105_tas.h" +#include "sja1105_ptp.h" /* Keeps the different addresses between E/T and P/Q/R/S */ struct sja1105_regs { @@ -32,9 +33,8 @@ struct sja1105_regs { u64 config; u64 rmii_pll1; u64 ptp_control; - u64 ptpclk; + u64 ptpclkval; u64 ptpclkrate; - u64 ptptsclk; u64 ptpegr_ts[SJA1105_NUM_PORTS]; u64 pad_mii_tx[SJA1105_NUM_PORTS]; u64 pad_mii_id[SJA1105_NUM_PORTS]; @@ -71,7 +71,8 @@ struct sja1105_info { const struct sja1105_dynamic_table_ops *dyn_ops; const struct sja1105_table_ops *static_ops; const struct sja1105_regs *regs; - int (*ptp_cmd)(const void *ctx, const void *data); + int (*ptp_cmd)(const struct dsa_switch *ds, + const struct sja1105_ptp_cmd *cmd); int (*reset_cmd)(const void *ctx, const void *data); int (*setup_rgmii_delay)(const void *ctx, int port); /* Prototypes from include/net/dsa.h */ @@ -91,26 +92,16 @@ struct sja1105_private { struct spi_device *spidev; struct dsa_switch *ds; struct sja1105_port ports[SJA1105_NUM_PORTS]; - struct ptp_clock_info ptp_caps; - struct ptp_clock *clock; - /* The cycle counter translates the PTP timestamps (based on - * a free-running counter) into a software time domain. - */ - struct cyclecounter tstamp_cc; - struct timecounter tstamp_tc; - struct delayed_work refresh_work; - /* Serializes all operations on the cycle counter */ - struct mutex ptp_lock; /* Serializes transmission of management frames so that * the switch doesn't confuse them with one another. */ struct mutex mgmt_lock; struct sja1105_tagger_data tagger_data; + struct sja1105_ptp_data ptp_data; struct sja1105_tas_data tas_data; }; #include "sja1105_dynamic_config.h" -#include "sja1105_ptp.h" struct sja1105_spi_message { u64 access; @@ -127,15 +118,13 @@ typedef enum { int sja1105_static_config_reload(struct sja1105_private *priv); /* From sja1105_spi.c */ -int sja1105_spi_send_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - void *packed_buf, size_t size_bytes); -int sja1105_spi_send_int(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - u64 *value, u64 size_bytes); -int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 base_addr, - void *packed_buf, u64 buf_len); +int sja1105_xfer_buf(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, + u8 *buf, size_t len); +int sja1105_xfer_u32(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u32 *value); +int sja1105_xfer_u64(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u64 *value); int sja1105_static_config_upload(struct sja1105_private *priv); int sja1105_inhibit_tx(const struct sja1105_private *priv, unsigned long port_bitmap, bool tx_inhibited); diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 608126a15d72..9082e52b55e9 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -118,9 +118,8 @@ static int sja1105_cgu_idiv_config(struct sja1105_private *priv, int port, idiv.pd = enabled ? 0 : 1; /* Power down? */ sja1105_cgu_idiv_packing(packed_buf, &idiv, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->cgu_idiv[port], packed_buf, - SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->cgu_idiv[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static void @@ -167,9 +166,8 @@ static int sja1105_cgu_mii_tx_clk_config(struct sja1105_private *priv, mii_tx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_tx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_tx_clk[port], packed_buf, - SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -192,9 +190,8 @@ sja1105_cgu_mii_rx_clk_config(struct sja1105_private *priv, int port) mii_rx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_rx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_rx_clk[port], packed_buf, - SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_rx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -217,9 +214,8 @@ sja1105_cgu_mii_ext_tx_clk_config(struct sja1105_private *priv, int port) mii_ext_tx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_ext_tx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_ext_tx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_ext_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -242,9 +238,8 @@ sja1105_cgu_mii_ext_rx_clk_config(struct sja1105_private *priv, int port) mii_ext_rx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &mii_ext_rx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->mii_ext_rx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->mii_ext_rx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int sja1105_mii_clocking_setup(struct sja1105_private *priv, int port, @@ -337,9 +332,8 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv, txc.pd = 0; sja1105_cgu_mii_control_packing(packed_buf, &txc, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->rgmii_tx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgmii_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } /* AGU */ @@ -383,9 +377,8 @@ static int sja1105_rgmii_cfg_pad_tx_config(struct sja1105_private *priv, pad_mii_tx.clk_ipud = 2; /* TX_CLK input stage (default) */ sja1105_cfg_pad_mii_tx_packing(packed_buf, &pad_mii_tx, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->pad_mii_tx[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_tx[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static void @@ -405,7 +398,7 @@ sja1105_cfg_pad_mii_id_packing(void *buf, struct sja1105_cfg_pad_mii_id *cmd, } /* Valid range in degrees is an integer between 73.8 and 101.7 */ -static inline u64 sja1105_rgmii_delay(u64 phase) +static u64 sja1105_rgmii_delay(u64 phase) { /* UM11040.pdf: The delay in degree phase is 73.8 + delay_tune * 0.9. * To avoid floating point operations we'll multiply by 10 @@ -442,9 +435,8 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port) pad_mii_id.txc_pd = 1; sja1105_cfg_pad_mii_id_packing(packed_buf, &pad_mii_id, PACK); - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->pad_mii_id[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_id[port], + packed_buf, SJA1105_SIZE_CGU_CMD); if (rc < 0) return rc; @@ -459,9 +451,8 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port) } sja1105_cfg_pad_mii_id_packing(packed_buf, &pad_mii_id, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->pad_mii_id[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_id[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int sja1105_rgmii_clocking_setup(struct sja1105_private *priv, int port, @@ -547,9 +538,8 @@ static int sja1105_cgu_rmii_ref_clk_config(struct sja1105_private *priv, ref_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &ref_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->rmii_ref_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_ref_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int @@ -565,9 +555,8 @@ sja1105_cgu_rmii_ext_tx_clk_config(struct sja1105_private *priv, int port) ext_tx_clk.pd = 0; /* Power Down off => enabled */ sja1105_cgu_mii_control_packing(packed_buf, &ext_tx_clk, PACK); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, - regs->rmii_ext_tx_clk[port], - packed_buf, SJA1105_SIZE_CGU_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_ext_tx_clk[port], + packed_buf, SJA1105_SIZE_CGU_CMD); } static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv) @@ -595,8 +584,8 @@ static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv) pll.pd = 0x1; sja1105_cgu_pll_control_packing(packed_buf, &pll, PACK); - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rmii_pll1, - packed_buf, SJA1105_SIZE_CGU_CMD); + rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_pll1, packed_buf, + SJA1105_SIZE_CGU_CMD); if (rc < 0) { dev_err(dev, "failed to configure PLL1 for 50MHz\n"); return rc; @@ -606,8 +595,8 @@ static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv) pll.pd = 0x0; sja1105_cgu_pll_control_packing(packed_buf, &pll, PACK); - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rmii_pll1, - packed_buf, SJA1105_SIZE_CGU_CMD); + rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->rmii_pll1, packed_buf, + SJA1105_SIZE_CGU_CMD); if (rc < 0) { dev_err(dev, "failed to enable PLL1\n"); return rc; diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index 91da430045ff..25381bd65ed7 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -686,8 +686,8 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv, ops->entry_packing(packed_buf, entry, PACK); /* Send SPI write operation: read config table entry */ - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, ops->addr, - packed_buf, ops->packed_size); + rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf, + ops->packed_size); if (rc < 0) return rc; @@ -698,8 +698,8 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv, memset(packed_buf, 0, ops->packed_size); /* Retrieve the read operation's result */ - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, ops->addr, - packed_buf, ops->packed_size); + rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf, + ops->packed_size); if (rc < 0) return rc; @@ -771,8 +771,8 @@ int sja1105_dynamic_config_write(struct sja1105_private *priv, ops->entry_packing(packed_buf, entry, PACK); /* Send SPI write operation: read config table entry */ - rc = sja1105_spi_send_packed_buf(priv, SPI_WRITE, ops->addr, - packed_buf, ops->packed_size); + rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf, + ops->packed_size); if (rc < 0) return rc; diff --git a/drivers/net/dsa/sja1105/sja1105_ethtool.c b/drivers/net/dsa/sja1105/sja1105_ethtool.c index ab581a28cd41..064301cc7d5b 100644 --- a/drivers/net/dsa/sja1105/sja1105_ethtool.c +++ b/drivers/net/dsa/sja1105/sja1105_ethtool.c @@ -167,8 +167,8 @@ static int sja1105_port_status_get_mac(struct sja1105_private *priv, int rc; /* MAC area */ - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac[port], - packed_buf, SJA1105_SIZE_MAC_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac[port], packed_buf, + SJA1105_SIZE_MAC_AREA); if (rc < 0) return rc; @@ -185,8 +185,8 @@ static int sja1105_port_status_get_hl1(struct sja1105_private *priv, u8 packed_buf[SJA1105_SIZE_HL1_AREA] = {0}; int rc; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac_hl1[port], - packed_buf, SJA1105_SIZE_HL1_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl1[port], packed_buf, + SJA1105_SIZE_HL1_AREA); if (rc < 0) return rc; @@ -203,8 +203,8 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv, u8 packed_buf[SJA1105_SIZE_QLEVEL_AREA] = {0}; int rc; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->mac_hl2[port], - packed_buf, SJA1105_SIZE_HL2_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl2[port], packed_buf, + SJA1105_SIZE_HL2_AREA); if (rc < 0) return rc; @@ -215,8 +215,8 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv, priv->info->device_id == SJA1105T_DEVICE_ID) return 0; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->qlevel[port], - packed_buf, SJA1105_SIZE_QLEVEL_AREA); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->qlevel[port], packed_buf, + SJA1105_SIZE_QLEVEL_AREA); if (rc < 0) return rc; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 7687ddcae159..2ffe642cf54b 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -382,8 +382,8 @@ static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv) static int sja1105_init_general_params(struct sja1105_private *priv) { struct sja1105_general_params_entry default_general_params = { - /* Disallow dynamic changing of the mirror port */ - .mirr_ptacu = 0, + /* Allow dynamic changing of the mirror port */ + .mirr_ptacu = true, .switchid = priv->ds->index, /* Priority queue for link-local management frames * (both ingress to and egress from CPU - PTP, STP etc) @@ -403,8 +403,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv) * by installing a temporary 'management route' */ .host_port = dsa_upstream_port(priv->ds, 0), - /* Same as host port */ - .mirr_port = dsa_upstream_port(priv->ds, 0), + /* Default to an invalid value */ + .mirr_port = SJA1105_NUM_PORTS, /* Link-local traffic received on casc_port will be forwarded * to host_port without embedding the source port and device ID * info in the destination MAC address (presumably because it @@ -458,9 +458,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv) #define SJA1105_RATE_MBPS(speed) (((speed) * 64000) / 1000) -static inline void -sja1105_setup_policer(struct sja1105_l2_policing_entry *policing, - int index) +static void sja1105_setup_policer(struct sja1105_l2_policing_entry *policing, + int index) { policing[index].sharindx = index; policing[index].smax = 65535; /* Burst size in bytes */ @@ -507,39 +506,6 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) return 0; } -static int sja1105_init_avb_params(struct sja1105_private *priv, - bool on) -{ - struct sja1105_avb_params_entry *avb; - struct sja1105_table *table; - - table = &priv->static_config.tables[BLK_IDX_AVB_PARAMS]; - - /* Discard previous AVB Parameters Table */ - if (table->entry_count) { - kfree(table->entries); - table->entry_count = 0; - } - - /* Configure the reception of meta frames only if requested */ - if (!on) - return 0; - - table->entries = kcalloc(SJA1105_MAX_AVB_PARAMS_COUNT, - table->ops->unpacked_entry_size, GFP_KERNEL); - if (!table->entries) - return -ENOMEM; - - table->entry_count = SJA1105_MAX_AVB_PARAMS_COUNT; - - avb = table->entries; - - avb->destmeta = SJA1105_META_DMAC; - avb->srcmeta = SJA1105_META_SMAC; - - return 0; -} - static int sja1105_static_config_load(struct sja1105_private *priv, struct sja1105_dt_port *ports) { @@ -580,9 +546,6 @@ static int sja1105_static_config_load(struct sja1105_private *priv, rc = sja1105_init_general_params(priv); if (rc < 0) return rc; - rc = sja1105_init_avb_params(priv, false); - if (rc < 0) - return rc; /* Send initial configuration to hardware via SPI */ return sja1105_static_config_upload(priv); @@ -951,7 +914,7 @@ sja1105_static_fdb_change(struct sja1105_private *priv, int port, * For the placement of a newly learnt FDB entry, the switch selects the bin * based on a hash function, and the way within that bin incrementally. */ -static inline int sja1105et_fdb_index(int bin, int way) +static int sja1105et_fdb_index(int bin, int way) { return bin * SJA1105ET_FDB_BIN_SIZE + way; } @@ -1687,7 +1650,7 @@ static int sja1105_setup(struct dsa_switch *ds) return rc; } - rc = sja1105_ptp_clock_register(priv); + rc = sja1105_ptp_clock_register(ds); if (rc < 0) { dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc); return rc; @@ -1729,9 +1692,7 @@ static void sja1105_teardown(struct dsa_switch *ds) struct sja1105_private *priv = ds->priv; sja1105_tas_teardown(ds); - cancel_work_sync(&priv->tagger_data.rxtstamp_work); - skb_queue_purge(&priv->tagger_data.skb_rxtstamp_queue); - sja1105_ptp_clock_unregister(priv); + sja1105_ptp_clock_unregister(ds); sja1105_static_config_free(&priv->static_config); } @@ -1817,11 +1778,8 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; struct sja1105_port *sp = &priv->ports[port]; - struct skb_shared_hwtstamps shwt = {0}; int slot = sp->mgmt_slot; struct sk_buff *clone; - u64 now, ts; - int rc; /* The tragic fact about the switch having 4x2 slots for installing * management routes is that all of them except one are actually @@ -1847,27 +1805,8 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port, if (!clone) goto out; - skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; - - mutex_lock(&priv->ptp_lock); + sja1105_ptp_txtstamp_skb(ds, slot, clone); - now = priv->tstamp_cc.read(&priv->tstamp_cc); - - rc = sja1105_ptpegr_ts_poll(priv, slot, &ts); - if (rc < 0) { - dev_err(ds->dev, "xmit: timed out polling for tstamp\n"); - kfree_skb(clone); - goto out_unlock_ptp; - } - - ts = sja1105_tstamp_reconstruct(priv, now, ts); - ts = timecounter_cyc2time(&priv->tstamp_tc, ts); - - shwt.hwtstamp = ns_to_ktime(ts); - skb_complete_tx_timestamp(clone, &shwt); - -out_unlock_ptp: - mutex_unlock(&priv->ptp_lock); out: mutex_unlock(&priv->mgmt_lock); return NETDEV_TX_OK; @@ -1897,180 +1836,94 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds, return sja1105_static_config_reload(priv); } -/* Must be called only with priv->tagger_data.state bit - * SJA1105_HWTS_RX_EN cleared +static int sja1105_port_setup_tc(struct dsa_switch *ds, int port, + enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_TAPRIO: + return sja1105_setup_tc_taprio(ds, port, type_data); + default: + return -EOPNOTSUPP; + } +} + +/* We have a single mirror (@to) port, but can configure ingress and egress + * mirroring on all other (@from) ports. + * We need to allow mirroring rules only as long as the @to port is always the + * same, and we need to unset the @to port from mirr_port only when there is no + * mirroring rule that references it. */ -static int sja1105_change_rxtstamping(struct sja1105_private *priv, - bool on) +static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to, + bool ingress, bool enabled) { struct sja1105_general_params_entry *general_params; + struct sja1105_mac_config_entry *mac; struct sja1105_table *table; + bool already_enabled; + u64 new_mirr_port; int rc; table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; general_params = table->entries; - general_params->send_meta1 = on; - general_params->send_meta0 = on; - rc = sja1105_init_avb_params(priv, on); - if (rc < 0) - return rc; + mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; - /* Initialize the meta state machine to a known state */ - if (priv->tagger_data.stampable_skb) { - kfree_skb(priv->tagger_data.stampable_skb); - priv->tagger_data.stampable_skb = NULL; + already_enabled = (general_params->mirr_port != SJA1105_NUM_PORTS); + if (already_enabled && enabled && general_params->mirr_port != to) { + dev_err(priv->ds->dev, + "Delete mirroring rules towards port %llu first\n", + general_params->mirr_port); + return -EBUSY; } - return sja1105_static_config_reload(priv); -} + new_mirr_port = to; + if (!enabled) { + bool keep = false; + int port; -static int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, - struct ifreq *ifr) -{ - struct sja1105_private *priv = ds->priv; - struct hwtstamp_config config; - bool rx_on; - int rc; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - priv->ports[port].hwts_tx_en = false; - break; - case HWTSTAMP_TX_ON: - priv->ports[port].hwts_tx_en = true; - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - rx_on = false; - break; - default: - rx_on = true; - break; + /* Anybody still referencing mirr_port? */ + for (port = 0; port < SJA1105_NUM_PORTS; port++) { + if (mac[port].ing_mirr || mac[port].egr_mirr) { + keep = true; + break; + } + } + /* Unset already_enabled for next time */ + if (!keep) + new_mirr_port = SJA1105_NUM_PORTS; } + if (new_mirr_port != general_params->mirr_port) { + general_params->mirr_port = new_mirr_port; - if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) { - clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); - - rc = sja1105_change_rxtstamping(priv, rx_on); - if (rc < 0) { - dev_err(ds->dev, - "Failed to change RX timestamping: %d\n", rc); + rc = sja1105_dynamic_config_write(priv, BLK_IDX_GENERAL_PARAMS, + 0, general_params, true); + if (rc < 0) return rc; - } - if (rx_on) - set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); } - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; - return 0; -} - -static int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, - struct ifreq *ifr) -{ - struct sja1105_private *priv = ds->priv; - struct hwtstamp_config config; - - config.flags = 0; - if (priv->ports[port].hwts_tx_en) - config.tx_type = HWTSTAMP_TX_ON; - else - config.tx_type = HWTSTAMP_TX_OFF; - if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + if (ingress) + mac[from].ing_mirr = enabled; else - config.rx_filter = HWTSTAMP_FILTER_NONE; - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -#define to_tagger(d) \ - container_of((d), struct sja1105_tagger_data, rxtstamp_work) -#define to_sja1105(d) \ - container_of((d), struct sja1105_private, tagger_data) - -static void sja1105_rxtstamp_work(struct work_struct *work) -{ - struct sja1105_tagger_data *data = to_tagger(work); - struct sja1105_private *priv = to_sja1105(data); - struct sk_buff *skb; - u64 now; - - mutex_lock(&priv->ptp_lock); + mac[from].egr_mirr = enabled; - while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) { - struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb); - u64 ts; - - now = priv->tstamp_cc.read(&priv->tstamp_cc); - - *shwt = (struct skb_shared_hwtstamps) {0}; - - ts = SJA1105_SKB_CB(skb)->meta_tstamp; - ts = sja1105_tstamp_reconstruct(priv, now, ts); - ts = timecounter_cyc2time(&priv->tstamp_tc, ts); - - shwt->hwtstamp = ns_to_ktime(ts); - netif_rx_ni(skb); - } - - mutex_unlock(&priv->ptp_lock); + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, from, + &mac[from], true); } -/* Called from dsa_skb_defer_rx_timestamp */ -static bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, - struct sk_buff *skb, unsigned int type) +static int sja1105_mirror_add(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress) { - struct sja1105_private *priv = ds->priv; - struct sja1105_tagger_data *data = &priv->tagger_data; - - if (!test_bit(SJA1105_HWTS_RX_EN, &data->state)) - return false; - - /* We need to read the full PTP clock to reconstruct the Rx - * timestamp. For that we need a sleepable context. - */ - skb_queue_tail(&data->skb_rxtstamp_queue, skb); - schedule_work(&data->rxtstamp_work); - return true; + return sja1105_mirror_apply(ds->priv, port, mirror->to_local_port, + ingress, true); } -/* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone - * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit - * callback, where we will timestamp it synchronously. - */ -static bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, - struct sk_buff *skb, unsigned int type) +static void sja1105_mirror_del(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror) { - struct sja1105_private *priv = ds->priv; - struct sja1105_port *sp = &priv->ports[port]; - - if (!sp->hwts_tx_en) - return false; - - return true; -} - -static int sja1105_port_setup_tc(struct dsa_switch *ds, int port, - enum tc_setup_type type, - void *type_data) -{ - switch (type) { - case TC_SETUP_QDISC_TAPRIO: - return sja1105_setup_tc_taprio(ds, port, type_data); - default: - return -EOPNOTSUPP; - } + sja1105_mirror_apply(ds->priv, port, mirror->to_local_port, + mirror->ingress, false); } static const struct dsa_switch_ops sja1105_switch_ops = { @@ -2106,6 +1959,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_rxtstamp = sja1105_port_rxtstamp, .port_txtstamp = sja1105_port_txtstamp, .port_setup_tc = sja1105_port_setup_tc, + .port_mirror_add = sja1105_mirror_add, + .port_mirror_del = sja1105_mirror_del, }; static int sja1105_check_device_id(struct sja1105_private *priv) @@ -2113,23 +1968,22 @@ static int sja1105_check_device_id(struct sja1105_private *priv) const struct sja1105_regs *regs = priv->info->regs; u8 prod_id[SJA1105_SIZE_DEVICE_ID] = {0}; struct device *dev = &priv->spidev->dev; - u64 device_id; + u32 device_id; u64 part_no; int rc; - rc = sja1105_spi_send_int(priv, SPI_READ, regs->device_id, - &device_id, SJA1105_SIZE_DEVICE_ID); + rc = sja1105_xfer_u32(priv, SPI_READ, regs->device_id, &device_id); if (rc < 0) return rc; if (device_id != priv->info->device_id) { - dev_err(dev, "Expected device ID 0x%llx but read 0x%llx\n", + dev_err(dev, "Expected device ID 0x%llx but read 0x%x\n", priv->info->device_id, device_id); return -ENODEV; } - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, regs->prod_id, - prod_id, SJA1105_SIZE_DEVICE_ID); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->prod_id, prod_id, + SJA1105_SIZE_DEVICE_ID); if (rc < 0) return rc; @@ -2202,9 +2056,6 @@ static int sja1105_probe(struct spi_device *spi) priv->ds = ds; tagger_data = &priv->tagger_data; - skb_queue_head_init(&tagger_data->skb_rxtstamp_queue); - INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work); - spin_lock_init(&tagger_data->meta_lock); /* Connections between dsa_port and sja1105_port */ for (i = 0; i < SJA1105_NUM_PORTS; i++) { @@ -2214,6 +2065,7 @@ static int sja1105_probe(struct spi_device *spi) sp->dp = &ds->ports[i]; sp->data = tagger_data; } + mutex_init(&priv->ptp_data.lock); mutex_init(&priv->mgmt_lock); sja1105_tas_setup(ds); diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index d8e8dd59f3d1..783100397f8a 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -13,24 +13,6 @@ #define SJA1105_MAX_ADJ_PPB 32000000 #define SJA1105_SIZE_PTP_CMD 4 -/* Timestamps are in units of 8 ns clock ticks (equivalent to a fixed - * 125 MHz clock) so the scale factor (MULT / SHIFT) needs to be 8. - * Furthermore, wisely pick SHIFT as 28 bits, which translates - * MULT into 2^31 (0x80000000). This is the same value around which - * the hardware PTPCLKRATE is centered, so the same ppb conversion - * arithmetic can be reused. - */ -#define SJA1105_CC_SHIFT 28 -#define SJA1105_CC_MULT (8 << SJA1105_CC_SHIFT) - -/* Having 33 bits of cycle counter left until a 64-bit overflow during delta - * conversion, we multiply this by the 8 ns counter resolution and arrive at - * a comfortable 68.71 second refresh interval until the delta would cause - * an integer overflow, in absence of any other readout. - * Approximate to 1 minute. - */ -#define SJA1105_REFRESH_INTERVAL (HZ * 60) - /* This range is actually +/- SJA1105_MAX_ADJ_PPB * divided by 1000 (ppb -> ppm) and with a 16-bit * "fractional" part (actually fixed point). @@ -41,7 +23,7 @@ * * This forgoes a "ppb" numeric representation (up to NSEC_PER_SEC) * and defines the scaling factor between scaled_ppm and the actual - * frequency adjustments (both cycle counter and hardware). + * frequency adjustments of the PHC. * * ptpclkrate = scaled_ppm * 2^31 / (10^6 * 2^16) * simplifies to @@ -49,22 +31,154 @@ */ #define SJA1105_CC_MULT_NUM (1 << 9) #define SJA1105_CC_MULT_DEM 15625 +#define SJA1105_CC_MULT 0x80000000 -#define ptp_to_sja1105(d) container_of((d), struct sja1105_private, ptp_caps) -#define cc_to_sja1105(d) container_of((d), struct sja1105_private, tstamp_cc) -#define dw_to_sja1105(d) container_of((d), struct sja1105_private, refresh_work) - -struct sja1105_ptp_cmd { - u64 resptp; /* reset */ +enum sja1105_ptp_clk_mode { + PTP_ADD_MODE = 1, + PTP_SET_MODE = 0, }; +#define ptp_caps_to_data(d) \ + container_of((d), struct sja1105_ptp_data, caps) +#define ptp_data_to_sja1105(d) \ + container_of((d), struct sja1105_private, ptp_data) + +static int sja1105_init_avb_params(struct sja1105_private *priv, + bool on) +{ + struct sja1105_avb_params_entry *avb; + struct sja1105_table *table; + + table = &priv->static_config.tables[BLK_IDX_AVB_PARAMS]; + + /* Discard previous AVB Parameters Table */ + if (table->entry_count) { + kfree(table->entries); + table->entry_count = 0; + } + + /* Configure the reception of meta frames only if requested */ + if (!on) + return 0; + + table->entries = kcalloc(SJA1105_MAX_AVB_PARAMS_COUNT, + table->ops->unpacked_entry_size, GFP_KERNEL); + if (!table->entries) + return -ENOMEM; + + table->entry_count = SJA1105_MAX_AVB_PARAMS_COUNT; + + avb = table->entries; + + avb->destmeta = SJA1105_META_DMAC; + avb->srcmeta = SJA1105_META_SMAC; + + return 0; +} + +/* Must be called only with priv->tagger_data.state bit + * SJA1105_HWTS_RX_EN cleared + */ +static int sja1105_change_rxtstamping(struct sja1105_private *priv, + bool on) +{ + struct sja1105_general_params_entry *general_params; + struct sja1105_table *table; + int rc; + + table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; + general_params = table->entries; + general_params->send_meta1 = on; + general_params->send_meta0 = on; + + rc = sja1105_init_avb_params(priv, on); + if (rc < 0) + return rc; + + /* Initialize the meta state machine to a known state */ + if (priv->tagger_data.stampable_skb) { + kfree_skb(priv->tagger_data.stampable_skb); + priv->tagger_data.stampable_skb = NULL; + } + + return sja1105_static_config_reload(priv); +} + +int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) +{ + struct sja1105_private *priv = ds->priv; + struct hwtstamp_config config; + bool rx_on; + int rc; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + priv->ports[port].hwts_tx_en = false; + break; + case HWTSTAMP_TX_ON: + priv->ports[port].hwts_tx_en = true; + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + rx_on = false; + break; + default: + rx_on = true; + break; + } + + if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) { + clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); + + rc = sja1105_change_rxtstamping(priv, rx_on); + if (rc < 0) { + dev_err(ds->dev, + "Failed to change RX timestamping: %d\n", rc); + return rc; + } + if (rx_on) + set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); + } + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + return 0; +} + +int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) +{ + struct sja1105_private *priv = ds->priv; + struct hwtstamp_config config; + + config.flags = 0; + if (priv->ports[port].hwts_tx_en) + config.tx_type = HWTSTAMP_TX_ON; + else + config.tx_type = HWTSTAMP_TX_OFF; + if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + else + config.rx_filter = HWTSTAMP_FILTER_NONE; + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + int sja1105_get_ts_info(struct dsa_switch *ds, int port, struct ethtool_ts_info *info) { struct sja1105_private *priv = ds->priv; + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; /* Called during cleanup */ - if (!priv->clock) + if (!ptp_data->clock) return -ENODEV; info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | @@ -74,14 +188,14 @@ int sja1105_get_ts_info(struct dsa_switch *ds, int port, (1 << HWTSTAMP_TX_ON); info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT); - info->phc_index = ptp_clock_index(priv->clock); + info->phc_index = ptp_clock_index(ptp_data->clock); return 0; } -int sja1105et_ptp_cmd(const void *ctx, const void *data) +int sja1105et_ptp_cmd(const struct dsa_switch *ds, + const struct sja1105_ptp_cmd *cmd) { - const struct sja1105_ptp_cmd *cmd = data; - const struct sja1105_private *priv = ctx; + const struct sja1105_private *priv = ds->priv; const struct sja1105_regs *regs = priv->info->regs; const int size = SJA1105_SIZE_PTP_CMD; u8 buf[SJA1105_SIZE_PTP_CMD] = {0}; @@ -90,15 +204,17 @@ int sja1105et_ptp_cmd(const void *ctx, const void *data) sja1105_pack(buf, &valid, 31, 31, size); sja1105_pack(buf, &cmd->resptp, 2, 2, size); + sja1105_pack(buf, &cmd->corrclk4ts, 1, 1, size); + sja1105_pack(buf, &cmd->ptpclkadd, 0, 0, size); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control, - buf, SJA1105_SIZE_PTP_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf, + SJA1105_SIZE_PTP_CMD); } -int sja1105pqrs_ptp_cmd(const void *ctx, const void *data) +int sja1105pqrs_ptp_cmd(const struct dsa_switch *ds, + const struct sja1105_ptp_cmd *cmd) { - const struct sja1105_ptp_cmd *cmd = data; - const struct sja1105_private *priv = ctx; + const struct sja1105_private *priv = ds->priv; const struct sja1105_regs *regs = priv->info->regs; const int size = SJA1105_SIZE_PTP_CMD; u8 buf[SJA1105_SIZE_PTP_CMD] = {0}; @@ -107,9 +223,11 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data) sja1105_pack(buf, &valid, 31, 31, size); sja1105_pack(buf, &cmd->resptp, 3, 3, size); + sja1105_pack(buf, &cmd->corrclk4ts, 2, 2, size); + sja1105_pack(buf, &cmd->ptpclkadd, 0, 0, size); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control, - buf, SJA1105_SIZE_PTP_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf, + SJA1105_SIZE_PTP_CMD); } /* The switch returns partial timestamps (24 bits for SJA1105 E/T, which wrap @@ -126,9 +244,10 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data) * Must be called within one wraparound period of the partial timestamp since * it was generated by the MAC. */ -u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now, - u64 ts_partial) +static u64 sja1105_tstamp_reconstruct(struct dsa_switch *ds, u64 now, + u64 ts_partial) { + struct sja1105_private *priv = ds->priv; u64 partial_tstamp_mask = CYCLECOUNTER_MASK(priv->info->ptp_ts_bits); u64 ts_reconstructed; @@ -170,8 +289,9 @@ u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now, * To have common code for E/T and P/Q/R/S for reading the timestamp, * we need to juggle with the offset and the bit indices. */ -int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts) +static int sja1105_ptpegr_ts_poll(struct dsa_switch *ds, int port, u64 *ts) { + struct sja1105_private *priv = ds->priv; const struct sja1105_regs *regs = priv->info->regs; int tstamp_bit_start, tstamp_bit_end; int timeout = 10; @@ -180,10 +300,8 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts) int rc; do { - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, - regs->ptpegr_ts[port], - packed_buf, - priv->info->ptpegr_ts_bytes); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->ptpegr_ts[port], + packed_buf, priv->info->ptpegr_ts_bytes); if (rc < 0) return rc; @@ -216,22 +334,109 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts) return 0; } -int sja1105_ptp_reset(struct sja1105_private *priv) +/* Caller must hold ptp_data->lock */ +static int sja1105_ptpclkval_read(struct sja1105_private *priv, u64 *ticks) +{ + const struct sja1105_regs *regs = priv->info->regs; + + return sja1105_xfer_u64(priv, SPI_READ, regs->ptpclkval, ticks); +} + +/* Caller must hold ptp_data->lock */ +static int sja1105_ptpclkval_write(struct sja1105_private *priv, u64 ticks) { + const struct sja1105_regs *regs = priv->info->regs; + + return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpclkval, &ticks); +} + +#define rxtstamp_to_tagger(d) \ + container_of((d), struct sja1105_tagger_data, rxtstamp_work) +#define tagger_to_sja1105(d) \ + container_of((d), struct sja1105_private, tagger_data) + +static void sja1105_rxtstamp_work(struct work_struct *work) +{ + struct sja1105_tagger_data *tagger_data = rxtstamp_to_tagger(work); + struct sja1105_private *priv = tagger_to_sja1105(tagger_data); + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; struct dsa_switch *ds = priv->ds; - struct sja1105_ptp_cmd cmd = {0}; + struct sk_buff *skb; + + mutex_lock(&ptp_data->lock); + + while ((skb = skb_dequeue(&tagger_data->skb_rxtstamp_queue)) != NULL) { + struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb); + u64 ticks, ts; + int rc; + + rc = sja1105_ptpclkval_read(priv, &ticks); + if (rc < 0) { + dev_err(ds->dev, "Failed to read PTP clock: %d\n", rc); + kfree_skb(skb); + continue; + } + + *shwt = (struct skb_shared_hwtstamps) {0}; + + ts = SJA1105_SKB_CB(skb)->meta_tstamp; + ts = sja1105_tstamp_reconstruct(ds, ticks, ts); + + shwt->hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts)); + netif_rx_ni(skb); + } + + mutex_unlock(&ptp_data->lock); +} + +/* Called from dsa_skb_defer_rx_timestamp */ +bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type) +{ + struct sja1105_private *priv = ds->priv; + struct sja1105_tagger_data *tagger_data = &priv->tagger_data; + + if (!test_bit(SJA1105_HWTS_RX_EN, &tagger_data->state)) + return false; + + /* We need to read the full PTP clock to reconstruct the Rx + * timestamp. For that we need a sleepable context. + */ + skb_queue_tail(&tagger_data->skb_rxtstamp_queue, skb); + schedule_work(&tagger_data->rxtstamp_work); + return true; +} + +/* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone + * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit + * callback, where we will timestamp it synchronously. + */ +bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type) +{ + struct sja1105_private *priv = ds->priv; + struct sja1105_port *sp = &priv->ports[port]; + + if (!sp->hwts_tx_en) + return false; + + return true; +} + +int sja1105_ptp_reset(struct dsa_switch *ds) +{ + struct sja1105_private *priv = ds->priv; + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; + struct sja1105_ptp_cmd cmd = ptp_data->cmd; int rc; - mutex_lock(&priv->ptp_lock); + mutex_lock(&ptp_data->lock); cmd.resptp = 1; dev_dbg(ds->dev, "Resetting PTP clock\n"); - rc = priv->info->ptp_cmd(priv, &cmd); - - timecounter_init(&priv->tstamp_tc, &priv->tstamp_cc, - ktime_to_ns(ktime_get_real())); + rc = priv->info->ptp_cmd(ds, &cmd); - mutex_unlock(&priv->ptp_lock); + mutex_unlock(&ptp_data->lock); return rc; } @@ -239,154 +444,185 @@ int sja1105_ptp_reset(struct sja1105_private *priv) static int sja1105_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { - struct sja1105_private *priv = ptp_to_sja1105(ptp); - u64 ns; + struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp); + struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data); + u64 ticks = 0; + int rc; - mutex_lock(&priv->ptp_lock); - ns = timecounter_read(&priv->tstamp_tc); - mutex_unlock(&priv->ptp_lock); + mutex_lock(&ptp_data->lock); - *ts = ns_to_timespec64(ns); + rc = sja1105_ptpclkval_read(priv, &ticks); + *ts = ns_to_timespec64(sja1105_ticks_to_ns(ticks)); - return 0; + mutex_unlock(&ptp_data->lock); + + return rc; +} + +/* Caller must hold ptp_data->lock */ +static int sja1105_ptp_mode_set(struct sja1105_private *priv, + enum sja1105_ptp_clk_mode mode) +{ + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; + + if (ptp_data->cmd.ptpclkadd == mode) + return 0; + + ptp_data->cmd.ptpclkadd = mode; + + return priv->info->ptp_cmd(priv->ds, &ptp_data->cmd); } +/* Write to PTPCLKVAL while PTPCLKADD is 0 */ static int sja1105_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { - struct sja1105_private *priv = ptp_to_sja1105(ptp); - u64 ns = timespec64_to_ns(ts); + struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp); + struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data); + u64 ticks = ns_to_sja1105_ticks(timespec64_to_ns(ts)); + int rc; - mutex_lock(&priv->ptp_lock); - timecounter_init(&priv->tstamp_tc, &priv->tstamp_cc, ns); - mutex_unlock(&priv->ptp_lock); + mutex_lock(&ptp_data->lock); - return 0; + rc = sja1105_ptp_mode_set(priv, PTP_SET_MODE); + if (rc < 0) { + dev_err(priv->ds->dev, "Failed to put PTPCLK in set mode\n"); + goto out; + } + + rc = sja1105_ptpclkval_write(priv, ticks); +out: + mutex_unlock(&ptp_data->lock); + + return rc; } static int sja1105_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { - struct sja1105_private *priv = ptp_to_sja1105(ptp); + struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp); + struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data); + const struct sja1105_regs *regs = priv->info->regs; + u32 clkrate32; s64 clkrate; + int rc; clkrate = (s64)scaled_ppm * SJA1105_CC_MULT_NUM; clkrate = div_s64(clkrate, SJA1105_CC_MULT_DEM); - mutex_lock(&priv->ptp_lock); - - /* Force a readout to update the timer *before* changing its frequency. - * - * This way, its corrected time curve can at all times be modeled - * as a linear "A * x + B" function, where: - * - * - B are past frequency adjustments and offset shifts, all - * accumulated into the cycle_last variable. - * - * - A is the new frequency adjustments we're just about to set. - * - * Reading now makes B accumulate the correct amount of time, - * corrected at the old rate, before changing it. - * - * Hardware timestamps then become simple points on the curve and - * are approximated using the above function. This is still better - * than letting the switch take the timestamps using the hardware - * rate-corrected clock (PTPCLKVAL) - the comparison in this case would - * be that we're shifting the ruler at the same time as we're taking - * measurements with it. - * - * The disadvantage is that it's possible to receive timestamps when - * a frequency adjustment took place in the near past. - * In this case they will be approximated using the new ppb value - * instead of a compound function made of two segments (one at the old - * and the other at the new rate) - introducing some inaccuracy. - */ - timecounter_read(&priv->tstamp_tc); + /* Take a +/- value and re-center it around 2^31. */ + clkrate = SJA1105_CC_MULT + clkrate; + WARN_ON(abs(clkrate) >= GENMASK_ULL(31, 0)); + clkrate32 = clkrate; - priv->tstamp_cc.mult = SJA1105_CC_MULT + clkrate; + mutex_lock(&ptp_data->lock); - mutex_unlock(&priv->ptp_lock); + rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkrate, &clkrate32); - return 0; + mutex_unlock(&ptp_data->lock); + + return rc; } +/* Write to PTPCLKVAL while PTPCLKADD is 1 */ static int sja1105_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { - struct sja1105_private *priv = ptp_to_sja1105(ptp); + struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp); + struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data); + s64 ticks = ns_to_sja1105_ticks(delta); + int rc; - mutex_lock(&priv->ptp_lock); - timecounter_adjtime(&priv->tstamp_tc, delta); - mutex_unlock(&priv->ptp_lock); + mutex_lock(&ptp_data->lock); - return 0; + rc = sja1105_ptp_mode_set(priv, PTP_ADD_MODE); + if (rc < 0) { + dev_err(priv->ds->dev, "Failed to put PTPCLK in add mode\n"); + goto out; + } + + rc = sja1105_ptpclkval_write(priv, ticks); + +out: + mutex_unlock(&ptp_data->lock); + + return rc; } -static u64 sja1105_ptptsclk_read(const struct cyclecounter *cc) +int sja1105_ptp_clock_register(struct dsa_switch *ds) { - struct sja1105_private *priv = cc_to_sja1105(cc); - const struct sja1105_regs *regs = priv->info->regs; - u64 ptptsclk = 0; - int rc; + struct sja1105_private *priv = ds->priv; + struct sja1105_tagger_data *tagger_data = &priv->tagger_data; + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; + + ptp_data->caps = (struct ptp_clock_info) { + .owner = THIS_MODULE, + .name = "SJA1105 PHC", + .adjfine = sja1105_ptp_adjfine, + .adjtime = sja1105_ptp_adjtime, + .gettime64 = sja1105_ptp_gettime, + .settime64 = sja1105_ptp_settime, + .max_adj = SJA1105_MAX_ADJ_PPB, + }; - rc = sja1105_spi_send_int(priv, SPI_READ, regs->ptptsclk, - &ptptsclk, 8); - if (rc < 0) - dev_err_ratelimited(priv->ds->dev, - "failed to read ptp cycle counter: %d\n", - rc); - return ptptsclk; + skb_queue_head_init(&tagger_data->skb_rxtstamp_queue); + INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work); + spin_lock_init(&tagger_data->meta_lock); + + ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev); + if (IS_ERR_OR_NULL(ptp_data->clock)) + return PTR_ERR(ptp_data->clock); + + ptp_data->cmd.corrclk4ts = true; + ptp_data->cmd.ptpclkadd = PTP_SET_MODE; + + return sja1105_ptp_reset(ds); } -static void sja1105_ptp_overflow_check(struct work_struct *work) +void sja1105_ptp_clock_unregister(struct dsa_switch *ds) { - struct delayed_work *dw = to_delayed_work(work); - struct sja1105_private *priv = dw_to_sja1105(dw); - struct timespec64 ts; + struct sja1105_private *priv = ds->priv; + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; - sja1105_ptp_gettime(&priv->ptp_caps, &ts); + if (IS_ERR_OR_NULL(ptp_data->clock)) + return; - schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL); + cancel_work_sync(&priv->tagger_data.rxtstamp_work); + skb_queue_purge(&priv->tagger_data.skb_rxtstamp_queue); + ptp_clock_unregister(ptp_data->clock); + ptp_data->clock = NULL; } -static const struct ptp_clock_info sja1105_ptp_caps = { - .owner = THIS_MODULE, - .name = "SJA1105 PHC", - .adjfine = sja1105_ptp_adjfine, - .adjtime = sja1105_ptp_adjtime, - .gettime64 = sja1105_ptp_gettime, - .settime64 = sja1105_ptp_settime, - .max_adj = SJA1105_MAX_ADJ_PPB, -}; - -int sja1105_ptp_clock_register(struct sja1105_private *priv) +void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot, + struct sk_buff *skb) { - struct dsa_switch *ds = priv->ds; + struct sja1105_private *priv = ds->priv; + struct sja1105_ptp_data *ptp_data = &priv->ptp_data; + struct skb_shared_hwtstamps shwt = {0}; + u64 ticks, ts; + int rc; - /* Set up the cycle counter */ - priv->tstamp_cc = (struct cyclecounter) { - .read = sja1105_ptptsclk_read, - .mask = CYCLECOUNTER_MASK(64), - .shift = SJA1105_CC_SHIFT, - .mult = SJA1105_CC_MULT, - }; - mutex_init(&priv->ptp_lock); - priv->ptp_caps = sja1105_ptp_caps; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - priv->clock = ptp_clock_register(&priv->ptp_caps, ds->dev); - if (IS_ERR_OR_NULL(priv->clock)) - return PTR_ERR(priv->clock); + mutex_lock(&ptp_data->lock); - INIT_DELAYED_WORK(&priv->refresh_work, sja1105_ptp_overflow_check); - schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL); + rc = sja1105_ptpclkval_read(priv, &ticks); + if (rc < 0) { + dev_err(ds->dev, "Failed to read PTP clock: %d\n", rc); + kfree_skb(skb); + goto out; + } - return sja1105_ptp_reset(priv); -} + rc = sja1105_ptpegr_ts_poll(ds, slot, &ts); + if (rc < 0) { + dev_err(ds->dev, "timed out polling for tstamp\n"); + kfree_skb(skb); + goto out; + } -void sja1105_ptp_clock_unregister(struct sja1105_private *priv) -{ - if (IS_ERR_OR_NULL(priv->clock)) - return; + ts = sja1105_tstamp_reconstruct(ds, ticks, ts); + + shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts)); + skb_complete_tx_timestamp(skb, &shwt); - cancel_delayed_work_sync(&priv->refresh_work); - ptp_clock_unregister(priv->clock); - priv->clock = NULL; +out: + mutex_unlock(&ptp_data->lock); } diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 394e12a6ad59..243f130374d2 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -6,49 +6,88 @@ #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) -int sja1105_ptp_clock_register(struct sja1105_private *priv); +/* Timestamps are in units of 8 ns clock ticks (equivalent to + * a fixed 125 MHz clock). + */ +#define SJA1105_TICK_NS 8 + +static inline s64 ns_to_sja1105_ticks(s64 ns) +{ + return ns / SJA1105_TICK_NS; +} + +static inline s64 sja1105_ticks_to_ns(s64 ticks) +{ + return ticks * SJA1105_TICK_NS; +} -void sja1105_ptp_clock_unregister(struct sja1105_private *priv); +struct sja1105_ptp_cmd { + u64 resptp; /* reset */ + u64 corrclk4ts; /* use the corrected clock for timestamps */ + u64 ptpclkadd; /* enum sja1105_ptp_clk_mode */ +}; -int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts); +struct sja1105_ptp_data { + struct ptp_clock_info caps; + struct ptp_clock *clock; + struct sja1105_ptp_cmd cmd; + /* Serializes all operations on the PTP hardware clock */ + struct mutex lock; +}; -int sja1105et_ptp_cmd(const void *ctx, const void *data); +int sja1105_ptp_clock_register(struct dsa_switch *ds); -int sja1105pqrs_ptp_cmd(const void *ctx, const void *data); +void sja1105_ptp_clock_unregister(struct dsa_switch *ds); + +int sja1105et_ptp_cmd(const struct dsa_switch *ds, + const struct sja1105_ptp_cmd *cmd); + +int sja1105pqrs_ptp_cmd(const struct dsa_switch *ds, + const struct sja1105_ptp_cmd *cmd); int sja1105_get_ts_info(struct dsa_switch *ds, int port, struct ethtool_ts_info *ts); -u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now, - u64 ts_partial); +void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot, + struct sk_buff *clone); + +int sja1105_ptp_reset(struct dsa_switch *ds); + +bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type); + +bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type); + +int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr); -int sja1105_ptp_reset(struct sja1105_private *priv); +int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr); #else -static inline int sja1105_ptp_clock_register(struct sja1105_private *priv) -{ - return 0; -} +struct sja1105_ptp_cmd; -static inline void sja1105_ptp_clock_unregister(struct sja1105_private *priv) -{ - return; -} +/* Structures cannot be empty in C. Bah! + * Keep the mutex as the only element, which is a bit more difficult to + * refactor out of sja1105_main.c anyway. + */ +struct sja1105_ptp_data { + struct mutex lock; +}; -static inline int -sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts) +static inline int sja1105_ptp_clock_register(struct dsa_switch *ds) { return 0; } -static inline u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, - u64 now, u64 ts_partial) +static inline void sja1105_ptp_clock_unregister(struct dsa_switch *ds) { } + +static inline void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot, + struct sk_buff *clone) { - return 0; } -static inline int sja1105_ptp_reset(struct sja1105_private *priv) +static inline int sja1105_ptp_reset(struct dsa_switch *ds) { return 0; } @@ -59,6 +98,14 @@ static inline int sja1105_ptp_reset(struct sja1105_private *priv) #define sja1105_get_ts_info NULL +#define sja1105_port_rxtstamp NULL + +#define sja1105_port_txtstamp NULL + +#define sja1105_hwtstamp_get NULL + +#define sja1105_hwtstamp_set NULL + #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */ #endif /* _SJA1105_PTP_H */ diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 58dd37ecde17..ed02410a9366 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -7,42 +7,15 @@ #include <linux/packing.h> #include "sja1105.h" -#define SJA1105_SIZE_PORT_CTRL 4 #define SJA1105_SIZE_RESET_CMD 4 #define SJA1105_SIZE_SPI_MSG_HEADER 4 #define SJA1105_SIZE_SPI_MSG_MAXLEN (64 * 4) -#define SJA1105_SIZE_SPI_TRANSFER_MAX \ - (SJA1105_SIZE_SPI_MSG_HEADER + SJA1105_SIZE_SPI_MSG_MAXLEN) -static int sja1105_spi_transfer(const struct sja1105_private *priv, - const void *tx, void *rx, int size) -{ - struct spi_device *spi = priv->spidev; - struct spi_transfer transfer = { - .tx_buf = tx, - .rx_buf = rx, - .len = size, - }; - struct spi_message msg; - int rc; - - if (size > SJA1105_SIZE_SPI_TRANSFER_MAX) { - dev_err(&spi->dev, "SPI message (%d) longer than max of %d\n", - size, SJA1105_SIZE_SPI_TRANSFER_MAX); - return -EMSGSIZE; - } - - spi_message_init(&msg); - spi_message_add_tail(&transfer, &msg); - - rc = spi_sync(spi, &msg); - if (rc < 0) { - dev_err(&spi->dev, "SPI transfer failed: %d\n", rc); - return rc; - } - - return rc; -} +struct sja1105_chunk { + u8 *buf; + size_t len; + u64 reg_addr; +}; static void sja1105_spi_message_pack(void *buf, const struct sja1105_spi_message *msg) @@ -56,121 +29,150 @@ sja1105_spi_message_pack(void *buf, const struct sja1105_spi_message *msg) sja1105_pack(buf, &msg->address, 24, 4, size); } +#define sja1105_hdr_xfer(xfers, chunk) \ + ((xfers) + 2 * (chunk)) +#define sja1105_chunk_xfer(xfers, chunk) \ + ((xfers) + 2 * (chunk) + 1) +#define sja1105_hdr_buf(hdr_bufs, chunk) \ + ((hdr_bufs) + (chunk) * SJA1105_SIZE_SPI_MSG_HEADER) + /* If @rw is: * - SPI_WRITE: creates and sends an SPI write message at absolute - * address reg_addr, taking size_bytes from *packed_buf + * address reg_addr, taking @len bytes from *buf * - SPI_READ: creates and sends an SPI read message from absolute - * address reg_addr, writing size_bytes into *packed_buf - * - * This function should only be called if it is priorly known that - * @size_bytes is smaller than SIZE_SPI_MSG_MAXLEN. Larger packed buffers - * are chunked in smaller pieces by sja1105_spi_send_long_packed_buf below. + * address reg_addr, writing @len bytes into *buf */ -int sja1105_spi_send_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - void *packed_buf, size_t size_bytes) +int sja1105_xfer_buf(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, + u8 *buf, size_t len) { - u8 tx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0}; - u8 rx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0}; - const int msg_len = size_bytes + SJA1105_SIZE_SPI_MSG_HEADER; - struct sja1105_spi_message msg = {0}; - int rc; + struct sja1105_chunk chunk = { + .len = min_t(size_t, len, SJA1105_SIZE_SPI_MSG_MAXLEN), + .reg_addr = reg_addr, + .buf = buf, + }; + struct spi_device *spi = priv->spidev; + struct spi_transfer *xfers; + int num_chunks; + int rc, i = 0; + u8 *hdr_bufs; - if (msg_len > SJA1105_SIZE_SPI_TRANSFER_MAX) - return -ERANGE; + num_chunks = DIV_ROUND_UP(len, SJA1105_SIZE_SPI_MSG_MAXLEN); - msg.access = rw; - msg.address = reg_addr; - if (rw == SPI_READ) - msg.read_count = size_bytes / 4; + /* One transfer for each message header, one for each message + * payload (chunk). + */ + xfers = kcalloc(2 * num_chunks, sizeof(struct spi_transfer), + GFP_KERNEL); + if (!xfers) + return -ENOMEM; - sja1105_spi_message_pack(tx_buf, &msg); + /* Packed buffers for the num_chunks SPI message headers, + * stored as a contiguous array + */ + hdr_bufs = kcalloc(num_chunks, SJA1105_SIZE_SPI_MSG_HEADER, + GFP_KERNEL); + if (!hdr_bufs) { + kfree(xfers); + return -ENOMEM; + } - if (rw == SPI_WRITE) - memcpy(tx_buf + SJA1105_SIZE_SPI_MSG_HEADER, - packed_buf, size_bytes); + for (i = 0; i < num_chunks; i++) { + struct spi_transfer *chunk_xfer = sja1105_chunk_xfer(xfers, i); + struct spi_transfer *hdr_xfer = sja1105_hdr_xfer(xfers, i); + u8 *hdr_buf = sja1105_hdr_buf(hdr_bufs, i); + struct sja1105_spi_message msg; + + /* Populate the transfer's header buffer */ + msg.address = chunk.reg_addr; + msg.access = rw; + if (rw == SPI_READ) + msg.read_count = chunk.len / 4; + else + /* Ignored */ + msg.read_count = 0; + sja1105_spi_message_pack(hdr_buf, &msg); + hdr_xfer->tx_buf = hdr_buf; + hdr_xfer->len = SJA1105_SIZE_SPI_MSG_HEADER; + + /* Populate the transfer's data buffer */ + if (rw == SPI_READ) + chunk_xfer->rx_buf = chunk.buf; + else + chunk_xfer->tx_buf = chunk.buf; + chunk_xfer->len = chunk.len; + + /* Calculate next chunk */ + chunk.buf += chunk.len; + chunk.reg_addr += chunk.len / 4; + chunk.len = min_t(size_t, (ptrdiff_t)(buf + len - chunk.buf), + SJA1105_SIZE_SPI_MSG_MAXLEN); + + /* De-assert the chip select after each chunk. */ + if (chunk.len) + chunk_xfer->cs_change = 1; + } - rc = sja1105_spi_transfer(priv, tx_buf, rx_buf, msg_len); + rc = spi_sync_transfer(spi, xfers, 2 * num_chunks); if (rc < 0) - return rc; + dev_err(&spi->dev, "SPI transfer failed: %d\n", rc); - if (rw == SPI_READ) - memcpy(packed_buf, rx_buf + SJA1105_SIZE_SPI_MSG_HEADER, - size_bytes); + kfree(hdr_bufs); + kfree(xfers); - return 0; + return rc; } /* If @rw is: * - SPI_WRITE: creates and sends an SPI write message at absolute - * address reg_addr, taking size_bytes from *packed_buf + * address reg_addr * - SPI_READ: creates and sends an SPI read message from absolute - * address reg_addr, writing size_bytes into *packed_buf + * address reg_addr * * The u64 *value is unpacked, meaning that it's stored in the native * CPU endianness and directly usable by software running on the core. - * - * This is a wrapper around sja1105_spi_send_packed_buf(). */ -int sja1105_spi_send_int(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 reg_addr, - u64 *value, u64 size_bytes) +int sja1105_xfer_u64(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u64 *value) { - u8 packed_buf[SJA1105_SIZE_SPI_MSG_MAXLEN]; + u8 packed_buf[8]; int rc; - if (size_bytes > SJA1105_SIZE_SPI_MSG_MAXLEN) - return -ERANGE; - if (rw == SPI_WRITE) - sja1105_pack(packed_buf, value, 8 * size_bytes - 1, 0, - size_bytes); + sja1105_pack(packed_buf, value, 63, 0, 8); - rc = sja1105_spi_send_packed_buf(priv, rw, reg_addr, packed_buf, - size_bytes); + rc = sja1105_xfer_buf(priv, rw, reg_addr, packed_buf, 8); if (rw == SPI_READ) - sja1105_unpack(packed_buf, value, 8 * size_bytes - 1, 0, - size_bytes); + sja1105_unpack(packed_buf, value, 63, 0, 8); return rc; } -/* Should be used if a @packed_buf larger than SJA1105_SIZE_SPI_MSG_MAXLEN - * must be sent/received. Splitting the buffer into chunks and assembling - * those into SPI messages is done automatically by this function. - */ -int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv, - sja1105_spi_rw_mode_t rw, u64 base_addr, - void *packed_buf, u64 buf_len) +/* Same as above, but transfers only a 4 byte word */ +int sja1105_xfer_u32(const struct sja1105_private *priv, + sja1105_spi_rw_mode_t rw, u64 reg_addr, u32 *value) { - struct chunk { - void *buf_ptr; - int len; - u64 spi_address; - } chunk; - int distance_to_end; + u8 packed_buf[4]; + u64 tmp; int rc; - /* Initialize chunk */ - chunk.buf_ptr = packed_buf; - chunk.spi_address = base_addr; - chunk.len = min_t(int, buf_len, SJA1105_SIZE_SPI_MSG_MAXLEN); + if (rw == SPI_WRITE) { + /* The packing API only supports u64 as CPU word size, + * so we need to convert. + */ + tmp = *value; + sja1105_pack(packed_buf, &tmp, 31, 0, 4); + } - while (chunk.len) { - rc = sja1105_spi_send_packed_buf(priv, rw, chunk.spi_address, - chunk.buf_ptr, chunk.len); - if (rc < 0) - return rc; + rc = sja1105_xfer_buf(priv, rw, reg_addr, packed_buf, 4); - chunk.buf_ptr += chunk.len; - chunk.spi_address += chunk.len / 4; - distance_to_end = (uintptr_t)(packed_buf + buf_len - - chunk.buf_ptr); - chunk.len = min(distance_to_end, SJA1105_SIZE_SPI_MSG_MAXLEN); + if (rw == SPI_READ) { + sja1105_unpack(packed_buf, &tmp, 31, 0, 4); + *value = tmp; } - return 0; + return rc; } /* Back-ported structure from UM11040 Table 112. @@ -241,8 +243,8 @@ static int sja1105et_reset_cmd(const void *ctx, const void *data) sja1105et_reset_cmd_pack(packed_buf, reset); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rgu, - packed_buf, SJA1105_SIZE_RESET_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf, + SJA1105_SIZE_RESET_CMD); } static int sja1105pqrs_reset_cmd(const void *ctx, const void *data) @@ -271,8 +273,8 @@ static int sja1105pqrs_reset_cmd(const void *ctx, const void *data) sja1105pqrs_reset_cmd_pack(packed_buf, reset); - return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->rgu, - packed_buf, SJA1105_SIZE_RESET_CMD); + return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf, + SJA1105_SIZE_RESET_CMD); } static int sja1105_cold_reset(const struct sja1105_private *priv) @@ -287,11 +289,11 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv, unsigned long port_bitmap, bool tx_inhibited) { const struct sja1105_regs *regs = priv->info->regs; - u64 inhibit_cmd; + u32 inhibit_cmd; int rc; - rc = sja1105_spi_send_int(priv, SPI_READ, regs->port_control, - &inhibit_cmd, SJA1105_SIZE_PORT_CTRL); + rc = sja1105_xfer_u32(priv, SPI_READ, regs->port_control, + &inhibit_cmd); if (rc < 0) return rc; @@ -300,8 +302,8 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv, else inhibit_cmd &= ~port_bitmap; - return sja1105_spi_send_int(priv, SPI_WRITE, regs->port_control, - &inhibit_cmd, SJA1105_SIZE_PORT_CTRL); + return sja1105_xfer_u32(priv, SPI_WRITE, regs->port_control, + &inhibit_cmd); } struct sja1105_status { @@ -339,9 +341,7 @@ static int sja1105_status_get(struct sja1105_private *priv, u8 packed_buf[4]; int rc; - rc = sja1105_spi_send_packed_buf(priv, SPI_READ, - regs->status, - packed_buf, 4); + rc = sja1105_xfer_buf(priv, SPI_READ, regs->status, packed_buf, 4); if (rc < 0) return rc; @@ -437,9 +437,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) /* Wait for the switch to come out of reset */ usleep_range(1000, 5000); /* Upload the static config to the device */ - rc = sja1105_spi_send_long_packed_buf(priv, SPI_WRITE, - regs->config, - config_buf, buf_len); + rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->config, + config_buf, buf_len); if (rc < 0) { dev_err(dev, "Failed to upload config, retrying...\n"); continue; @@ -482,7 +481,7 @@ int sja1105_static_config_upload(struct sja1105_private *priv) dev_info(dev, "Succeeded after %d tried\n", RETRIES - retries); } - rc = sja1105_ptp_reset(priv); + rc = sja1105_ptp_reset(priv->ds); if (rc < 0) dev_err(dev, "Failed to reset PTP clock: %d\n", rc); @@ -517,9 +516,8 @@ static struct sja1105_regs sja1105et_regs = { .rmii_ext_tx_clk = {0x100018, 0x10001F, 0x100026, 0x10002D, 0x100034}, .ptpegr_ts = {0xC0, 0xC2, 0xC4, 0xC6, 0xC8}, .ptp_control = 0x17, - .ptpclk = 0x18, /* Spans 0x18 to 0x19 */ + .ptpclkval = 0x18, /* Spans 0x18 to 0x19 */ .ptpclkrate = 0x1A, - .ptptsclk = 0x1B, /* Spans 0x1B to 0x1C */ }; static struct sja1105_regs sja1105pqrs_regs = { @@ -548,9 +546,8 @@ static struct sja1105_regs sja1105pqrs_regs = { .qlevel = {0x604, 0x614, 0x624, 0x634, 0x644}, .ptpegr_ts = {0xC0, 0xC4, 0xC8, 0xCC, 0xD0}, .ptp_control = 0x18, - .ptpclk = 0x19, + .ptpclkval = 0x19, .ptpclkrate = 0x1B, - .ptptsclk = 0x1C, }; struct sja1105_info sja1105e_info = { diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 16553d92fad2..a3250dcf7d53 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -133,7 +133,7 @@ static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) u64 *ptr; int i, j; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { /* Tx stats */ ring = &adapter->tx_ring[i]; @@ -205,7 +205,7 @@ int ena_get_sset_count(struct net_device *netdev, int sset) if (sset != ETH_SS_STATS) return -EOPNOTSUPP; - return adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM; } @@ -214,7 +214,7 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) const struct ena_stats *ena_stats; int i, j; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { /* Tx stats */ for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { ena_stats = &ena_stats_tx_strings[j]; @@ -333,7 +333,7 @@ static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev); - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) adapter->tx_ring[i].smoothed_interval = val; } @@ -344,7 +344,7 @@ static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter) val = ena_com_get_nonadaptive_moderation_interval_rx(adapter->ena_dev); - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) adapter->rx_ring[i].smoothed_interval = val; } @@ -612,7 +612,7 @@ static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info, switch (info->cmd) { case ETHTOOL_GRXRINGS: - info->data = adapter->num_queues; + info->data = adapter->num_io_queues; rc = 0; break; case ETHTOOL_GRXFH: @@ -734,14 +734,20 @@ static void ena_get_channels(struct net_device *netdev, { struct ena_adapter *adapter = netdev_priv(netdev); - channels->max_rx = adapter->num_queues; - channels->max_tx = adapter->num_queues; - channels->max_other = 0; - channels->max_combined = 0; - channels->rx_count = adapter->num_queues; - channels->tx_count = adapter->num_queues; - channels->other_count = 0; - channels->combined_count = 0; + channels->max_combined = adapter->max_num_io_queues; + channels->combined_count = adapter->num_io_queues; +} + +static int ena_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + u32 count = channels->combined_count; + /* The check for max value is already done in ethtool */ + if (count < ENA_MIN_NUM_IO_QUEUES) + return -EINVAL; + + return ena_update_queue_count(adapter, count); } static int ena_get_tunable(struct net_device *netdev, @@ -807,6 +813,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .get_rxfh = ena_get_rxfh, .set_rxfh = ena_set_rxfh, .get_channels = ena_get_channels, + .set_channels = ena_set_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, }; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c487d2a7d6dd..d46a912002ff 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -101,7 +101,7 @@ static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) adapter->rx_ring[i].mtu = mtu; } @@ -129,10 +129,10 @@ static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) u32 i; int rc; - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues); + adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues); if (!adapter->netdev->rx_cpu_rmap) return -ENOMEM; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { int irq_idx = ENA_IO_IRQ_IDX(i); rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, @@ -172,7 +172,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) ena_dev = adapter->ena_dev; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { txr = &adapter->tx_ring[i]; rxr = &adapter->rx_ring[i]; @@ -294,7 +294,7 @@ static int ena_setup_all_tx_resources(struct ena_adapter *adapter) { int i, rc = 0; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_setup_tx_resources(adapter, i); if (rc) goto err_setup_tx; @@ -322,7 +322,7 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_free_tx_resources(adapter, i); } @@ -428,7 +428,7 @@ static int ena_setup_all_rx_resources(struct ena_adapter *adapter) { int i, rc = 0; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_setup_rx_resources(adapter, i); if (rc) goto err_setup_rx; @@ -456,7 +456,7 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_free_rx_resources(adapter, i); } @@ -600,7 +600,7 @@ static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) struct ena_ring *rx_ring; int i, rc, bufs_num; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rx_ring = &adapter->rx_ring[i]; bufs_num = rx_ring->ring_size - 1; rc = ena_refill_rx_bufs(rx_ring, bufs_num); @@ -616,7 +616,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_free_rx_bufs(adapter, i); } @@ -688,7 +688,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter) struct ena_ring *tx_ring; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { tx_ring = &adapter->tx_ring[i]; ena_free_tx_bufs(tx_ring); } @@ -699,7 +699,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) u16 ena_qid; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { ena_qid = ENA_IO_TXQ_IDX(i); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } @@ -710,7 +710,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) u16 ena_qid; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { ena_qid = ENA_IO_RXQ_IDX(i); cancel_work_sync(&adapter->ena_napi[i].dim.work); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); @@ -1331,7 +1331,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) * the number of potential io queues is the minimum of what the device * supports and the number of vCPUs. */ -static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) +static int ena_enable_msix(struct ena_adapter *adapter) { int msix_vecs, irq_cnt; @@ -1342,7 +1342,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) } /* Reserved the max msix vectors we might need */ - msix_vecs = ENA_MAX_MSIX_VEC(num_queues); + msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues); netif_dbg(adapter, probe, adapter->netdev, "trying to enable MSI-X, vectors %d\n", msix_vecs); @@ -1359,7 +1359,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) netif_notice(adapter, probe, adapter->netdev, "enable only %d MSI-X (out of %d), reduce the number of queues\n", irq_cnt, msix_vecs); - adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; + adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; } if (ena_init_rx_cpu_rmap(adapter)) @@ -1397,7 +1397,7 @@ static void ena_setup_io_intr(struct ena_adapter *adapter) netdev = adapter->netdev; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { irq_idx = ENA_IO_IRQ_IDX(i); cpu = i % num_online_cpus(); @@ -1529,7 +1529,7 @@ static void ena_del_napi(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) netif_napi_del(&adapter->ena_napi[i].napi); } @@ -1538,7 +1538,7 @@ static void ena_init_napi(struct ena_adapter *adapter) struct ena_napi *napi; int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { napi = &adapter->ena_napi[i]; netif_napi_add(adapter->netdev, @@ -1555,7 +1555,7 @@ static void ena_napi_disable_all(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) napi_disable(&adapter->ena_napi[i].napi); } @@ -1563,7 +1563,7 @@ static void ena_napi_enable_all(struct ena_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) napi_enable(&adapter->ena_napi[i].napi); } @@ -1673,7 +1673,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_create_io_tx_queue(adapter, i); if (rc) goto create_err; @@ -1741,7 +1741,7 @@ static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rc = ena_create_io_rx_queue(adapter, i); if (rc) goto create_err; @@ -1764,7 +1764,7 @@ static void set_io_rings_size(struct ena_adapter *adapter, { int i; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { adapter->tx_ring[i].ring_size = new_tx_size; adapter->rx_ring[i].ring_size = new_rx_size; } @@ -1902,14 +1902,14 @@ static int ena_up(struct ena_adapter *adapter) set_bit(ENA_FLAG_DEV_UP, &adapter->flags); /* Enable completion queues interrupt */ - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) ena_unmask_interrupt(&adapter->tx_ring[i], &adapter->rx_ring[i]); /* schedule napi in case we had pending packets * from the last time we disable napi */ - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_io_queues; i++) napi_schedule(&adapter->ena_napi[i].napi); return rc; @@ -1984,13 +1984,13 @@ static int ena_open(struct net_device *netdev) int rc; /* Notify the stack of the actual queue counts. */ - rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues); + rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues); if (rc) { netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); return rc; } - rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues); + rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues); if (rc) { netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); return rc; @@ -2043,14 +2043,30 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, u32 new_tx_size, u32 new_rx_size) { - bool dev_up; + bool dev_was_up; - dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); ena_close(adapter->netdev); adapter->requested_tx_ring_size = new_tx_size; adapter->requested_rx_ring_size = new_rx_size; ena_init_io_rings(adapter); - return dev_up ? ena_up(adapter) : 0; + return dev_was_up ? ena_up(adapter) : 0; +} + +int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + bool dev_was_up; + + dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + ena_close(adapter->netdev); + adapter->num_io_queues = new_channel_count; + /* We need to destroy the rss table so that the indirection + * table will be reinitialized by ena_up() + */ + ena_com_rss_destroy(ena_dev); + ena_init_io_rings(adapter); + return dev_was_up ? ena_open(adapter->netdev) : 0; } static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) @@ -2495,7 +2511,7 @@ static void ena_get_stats64(struct net_device *netdev, if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) return; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { u64 bytes, packets; tx_ring = &adapter->tx_ring[i]; @@ -2682,14 +2698,13 @@ err_mmio_read_less: return rc; } -static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, - int io_vectors) +static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) { struct ena_com_dev *ena_dev = adapter->ena_dev; struct device *dev = &adapter->pdev->dev; int rc; - rc = ena_enable_msix(adapter, io_vectors); + rc = ena_enable_msix(adapter); if (rc) { dev_err(dev, "Can not reserve msix vectors\n"); return rc; @@ -2782,8 +2797,7 @@ static int ena_restore_device(struct ena_adapter *adapter) goto err_device_destroy; } - rc = ena_enable_msix_and_set_admin_interrupts(adapter, - adapter->num_queues); + rc = ena_enable_msix_and_set_admin_interrupts(adapter); if (rc) { dev_err(&pdev->dev, "Enable MSI-X failed\n"); goto err_device_destroy; @@ -2948,7 +2962,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) budget = ENA_MONITORED_TX_QUEUES; - for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { + for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) { tx_ring = &adapter->tx_ring[i]; rx_ring = &adapter->rx_ring[i]; @@ -2965,7 +2979,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) break; } - adapter->last_monitored_tx_qid = i % adapter->num_queues; + adapter->last_monitored_tx_qid = i % adapter->num_io_queues; } /* trigger napi schedule after 2 consecutive detections */ @@ -2995,7 +3009,7 @@ static void check_for_empty_rx_ring(struct ena_adapter *adapter) if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) return; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_io_queues; i++) { rx_ring = &adapter->rx_ring[i]; refill_required = @@ -3137,16 +3151,16 @@ static void ena_timer_service(struct timer_list *t) mod_timer(&adapter->timer_service, jiffies + HZ); } -static int ena_calc_io_queue_num(struct pci_dev *pdev, - struct ena_com_dev *ena_dev, - struct ena_com_dev_get_features_ctx *get_feat_ctx) +static int ena_calc_max_io_queue_num(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) { - int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num; + int io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { struct ena_admin_queue_ext_feature_fields *max_queue_ext = &get_feat_ctx->max_queue_ext.max_queue_ext; - io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num, + io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num, max_queue_ext->max_rx_cq_num); io_tx_sq_num = max_queue_ext->max_tx_sq_num; @@ -3156,25 +3170,25 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev, &get_feat_ctx->max_queues; io_tx_sq_num = max_queues->max_sq_num; io_tx_cq_num = max_queues->max_cq_num; - io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num); + io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num); } /* In case of LLQ use the llq fields for the tx SQ/CQ */ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) io_tx_sq_num = get_feat_ctx->llq.max_llq_num; - io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); - io_queue_num = min_t(int, io_queue_num, io_rx_num); - io_queue_num = min_t(int, io_queue_num, io_tx_sq_num); - io_queue_num = min_t(int, io_queue_num, io_tx_cq_num); + max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); + max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num); + max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num); + max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ - io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); - if (unlikely(!io_queue_num)) { + max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); + if (unlikely(!max_num_io_queues)) { dev_err(&pdev->dev, "The device doesn't have io queues\n"); return -EFAULT; } - return io_queue_num; + return max_num_io_queues; } static int ena_set_queues_placement_policy(struct pci_dev *pdev, @@ -3302,7 +3316,7 @@ static int ena_rss_init_default(struct ena_adapter *adapter) } for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { - val = ethtool_rxfh_indir_default(i, adapter->num_queues); + val = ethtool_rxfh_indir_default(i, adapter->num_io_queues); rc = ena_com_indirect_table_fill_entry(ena_dev, i, ENA_IO_RXQ_IDX(val)); if (unlikely(rc && (rc != -EOPNOTSUPP))) { @@ -3349,7 +3363,7 @@ static void set_default_llq_configurations(struct ena_llq_configurations *llq_co llq_config->llq_ring_entry_size_value = 128; } -static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx) +static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) { struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; struct ena_com_dev *ena_dev = ctx->ena_dev; @@ -3358,7 +3372,7 @@ static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx) u32 max_tx_queue_size; u32 max_rx_queue_size; - if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { + if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { struct ena_admin_queue_ext_feature_fields *max_queue_ext = &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, @@ -3432,11 +3446,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ena_llq_configurations llq_config; struct ena_com_dev *ena_dev = NULL; struct ena_adapter *adapter; - int io_queue_num, bars, rc; struct net_device *netdev; static int adapters_found; + u32 max_num_io_queues; char *queue_type_str; bool wd_state; + int bars, rc; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -3497,27 +3512,20 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) calc_queue_ctx.pdev = pdev; /* Initial Tx and RX interrupt delay. Assumes 1 usec granularity. - * Updated during device initialization with the real granularity - */ + * Updated during device initialization with the real granularity + */ ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS; ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; - io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); - rc = ena_calc_queue_size(&calc_queue_ctx); - if (rc || io_queue_num <= 0) { + max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx); + rc = ena_calc_io_queue_size(&calc_queue_ctx); + if (rc || !max_num_io_queues) { rc = -EFAULT; goto err_device_destroy; } - dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n", - io_queue_num, - calc_queue_ctx.rx_queue_size, - calc_queue_ctx.tx_queue_size, - (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ? - "ENABLED" : "DISABLED"); - /* dev zeroed in init_etherdev */ - netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); + netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues); if (!netdev) { dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); rc = -ENOMEM; @@ -3545,7 +3553,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; - adapter->num_queues = io_queue_num; + adapter->num_io_queues = max_num_io_queues; + adapter->max_num_io_queues = max_num_io_queues; + adapter->last_monitored_tx_qid = 0; adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; @@ -3569,7 +3579,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u64_stats_init(&adapter->syncp); - rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num); + rc = ena_enable_msix_and_set_admin_interrupts(adapter); if (rc) { dev_err(&pdev->dev, "Failed to enable and set the admin interrupts\n"); @@ -3611,9 +3621,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) queue_type_str = "Low Latency"; dev_info(&pdev->dev, - "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n", + "%s found at mem %lx, mac addr %pM, Placement policy: %s\n", DEVICE_NAME, (long)pci_resource_start(pdev, 0), - netdev->dev_addr, io_queue_num, queue_type_str); + netdev->dev_addr, queue_type_str); set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 72ee51a82ec7..bffd778f2ce3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -82,6 +82,8 @@ #define ENA_DEFAULT_RING_SIZE (1024) #define ENA_MIN_RING_SIZE (256) +#define ENA_MIN_NUM_IO_QUEUES (1) + #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) #define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN) @@ -161,10 +163,10 @@ struct ena_calc_queue_size_ctx { struct ena_com_dev_get_features_ctx *get_feat_ctx; struct ena_com_dev *ena_dev; struct pci_dev *pdev; - u16 tx_queue_size; - u16 rx_queue_size; - u16 max_tx_queue_size; - u16 max_rx_queue_size; + u32 tx_queue_size; + u32 rx_queue_size; + u32 max_tx_queue_size; + u32 max_rx_queue_size; u16 max_tx_sgl_size; u16 max_rx_sgl_size; }; @@ -324,7 +326,8 @@ struct ena_adapter { u32 rx_copybreak; u32 max_mtu; - int num_queues; + u32 num_io_queues; + u32 max_num_io_queues; int msix_vecs; @@ -387,6 +390,7 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_update_queue_sizes(struct ena_adapter *adapter, u32 new_tx_size, u32 new_rx_size); +int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); int ena_get_sset_count(struct net_device *netdev, int sset); diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 97ab0dd25552..035dbb1b2c98 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -511,9 +511,6 @@ static void b44_stats_update(struct b44 *bp) *val++ += br32(bp, reg); } - /* Pad */ - reg += 8*4UL; - for (reg = B44_RX_GOOD_O; reg <= B44_RX_NPAUSE; reg += 4UL) { *val++ += br32(bp, reg); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index e664392dccc0..ff1bc0ec2e7c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -16,7 +16,8 @@ #include "bnxt_devlink.h" static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); struct bnxt_fw_health *health = bp->fw_health; @@ -66,7 +67,8 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = { }; static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); @@ -84,7 +86,8 @@ struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = { }; static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0f138280315a..4f689fb7a61c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3472,16 +3472,10 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } - if (dn) { + if (dn) macaddr = of_get_mac_address(dn); - if (IS_ERR(macaddr)) { - dev_err(&pdev->dev, "can't find MAC address\n"); - err = -EINVAL; - goto err; - } - } else { + else macaddr = pd->mac_address; - } priv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->base)) { @@ -3493,7 +3487,12 @@ static int bcmgenet_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); dev_set_drvdata(&pdev->dev, dev); - ether_addr_copy(dev->dev_addr, macaddr); + if (IS_ERR_OR_NULL(macaddr) || !is_valid_ether_addr(macaddr)) { + dev_warn(&pdev->dev, "using random Ethernet MAC\n"); + eth_hw_addr_random(dev); + } else { + ether_addr_copy(dev->dev_addr, macaddr); + } dev->watchdog_timeo = 2 * HZ; dev->ethtool_ops = &bcmgenet_ethtool_ops; dev->netdev_ops = &bcmgenet_netdev_ops; @@ -3608,6 +3607,11 @@ static int bcmgenet_remove(struct platform_device *pdev) return 0; } +static void bcmgenet_shutdown(struct platform_device *pdev) +{ + bcmgenet_remove(pdev); +} + #ifdef CONFIG_PM_SLEEP static int bcmgenet_resume(struct device *d) { @@ -3726,6 +3730,7 @@ static SIMPLE_DEV_PM_OPS(bcmgenet_pm_ops, bcmgenet_suspend, bcmgenet_resume); static struct platform_driver bcmgenet_driver = { .probe = bcmgenet_probe, .remove = bcmgenet_remove, + .shutdown = bcmgenet_shutdown, .driver = { .name = "bcmgenet", .of_match_table = bcmgenet_match, diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index f96a42af1014..af04a2c81adb 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1914,10 +1914,10 @@ static struct platform_driver xgmac_driver = { .driver = { .name = "calxedaxgmac", .of_match_table = xgmac_of_match, + .pm = &xgmac_pm_ops, }, .probe = xgmac_probe, .remove = xgmac_remove, - .driver.pm = &xgmac_pm_ops, }; module_platform_driver(xgmac_driver); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9b7af94a40bb..824310253099 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -90,6 +90,9 @@ struct ftgmac100 { struct mii_bus *mii_bus; struct clk *clk; + /* AST2500/AST2600 RMII ref clock gate */ + struct clk *rclk; + /* Link management */ int cur_speed; int cur_duplex; @@ -1718,20 +1721,41 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd) nd->link_up ? "up" : "down"); } -static void ftgmac100_setup_clk(struct ftgmac100 *priv) +static int ftgmac100_setup_clk(struct ftgmac100 *priv) { - priv->clk = devm_clk_get(priv->dev, NULL); - if (IS_ERR(priv->clk)) - return; + struct clk *clk; + int rc; - clk_prepare_enable(priv->clk); + clk = devm_clk_get(priv->dev, NULL /* MACCLK */); + if (IS_ERR(clk)) + return PTR_ERR(clk); + priv->clk = clk; + rc = clk_prepare_enable(priv->clk); + if (rc) + return rc; /* Aspeed specifies a 100MHz clock is required for up to * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz * is sufficient */ - clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ : - FTGMAC_100MHZ); + rc = clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ : + FTGMAC_100MHZ); + if (rc) + goto cleanup_clk; + + /* RCLK is for RMII, typically used for NCSI. Optional because its not + * necessary if it's the AST2400 MAC, or the MAC is configured for + * RGMII, or the controller is not an ASPEED-based controller. + */ + priv->rclk = devm_clk_get_optional(priv->dev, "RCLK"); + rc = clk_prepare_enable(priv->rclk); + if (!rc) + return 0; + +cleanup_clk: + clk_disable_unprepare(priv->clk); + + return rc; } static int ftgmac100_probe(struct platform_device *pdev) @@ -1853,8 +1877,11 @@ static int ftgmac100_probe(struct platform_device *pdev) goto err_setup_mdio; } - if (priv->is_aspeed) - ftgmac100_setup_clk(priv); + if (priv->is_aspeed) { + err = ftgmac100_setup_clk(priv); + if (err) + goto err_ncsi_dev; + } /* Default ring sizes */ priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES; @@ -1886,8 +1913,10 @@ static int ftgmac100_probe(struct platform_device *pdev) return 0; -err_ncsi_dev: err_register_netdev: + clk_disable_unprepare(priv->rclk); + clk_disable_unprepare(priv->clk); +err_ncsi_dev: ftgmac100_destroy_mdio(netdev); err_setup_mdio: iounmap(priv->base); @@ -1909,6 +1938,7 @@ static int ftgmac100_remove(struct platform_device *pdev) unregister_netdev(netdev); + clk_disable_unprepare(priv->rclk); clk_disable_unprepare(priv->clk); /* There's a small chance the reset task will have been re-queued, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 19379bae0144..90fc79b3fd0a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -221,6 +221,7 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv, struct dpaa2_eth_channel *ch, dma_addr_t addr) { + int retries = 0; int err; ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr; @@ -229,8 +230,11 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv, while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid, ch->xdp.drop_bufs, - ch->xdp.drop_cnt)) == -EBUSY) + ch->xdp.drop_cnt)) == -EBUSY) { + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) + break; cpu_relax(); + } if (err) { free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt); @@ -458,7 +462,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch, struct dpaa2_eth_fq *fq = NULL; struct dpaa2_dq *dq; const struct dpaa2_fd *fd; - int cleaned = 0; + int cleaned = 0, retries = 0; int is_last; do { @@ -469,6 +473,11 @@ static int consume_frames(struct dpaa2_eth_channel *ch, * the store until we get some sort of valid response * token (either a valid frame or an "empty dequeue") */ + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) { + netdev_err_once(priv->net_dev, + "Unable to read a valid dequeue response\n"); + return -ETIMEDOUT; + } continue; } @@ -477,6 +486,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch, fq->consume(priv, ch, fd, fq); cleaned++; + retries = 0; } while (!is_last); if (!cleaned) @@ -949,6 +959,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv, u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; struct page *page; dma_addr_t addr; + int retries = 0; int i, err; for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) { @@ -980,8 +991,11 @@ static int add_bufs(struct dpaa2_eth_priv *priv, release_bufs: /* In case the portal is busy, retry until successful */ while ((err = dpaa2_io_service_release(ch->dpio, bpid, - buf_array, i)) == -EBUSY) + buf_array, i)) == -EBUSY) { + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) + break; cpu_relax(); + } /* If release command failed, clean up and bail out; * not much else we can do about it @@ -1032,16 +1046,21 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid) static void drain_bufs(struct dpaa2_eth_priv *priv, int count) { u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; + int retries = 0; int ret; do { ret = dpaa2_io_service_acquire(NULL, priv->bpid, buf_array, count); if (ret < 0) { + if (ret == -EBUSY && + retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) + continue; netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n"); return; } free_bufs(priv, buf_array, ret); + retries = 0; } while (ret); } @@ -1094,7 +1113,7 @@ static int pull_channel(struct dpaa2_eth_channel *ch) ch->store); dequeues++; cpu_relax(); - } while (err == -EBUSY); + } while (err == -EBUSY && dequeues < DPAA2_ETH_SWP_BUSY_RETRIES); ch->stats.dequeue_portal_busy += dequeues; if (unlikely(err)) @@ -1118,6 +1137,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) struct netdev_queue *nq; int store_cleaned, work_done; struct list_head rx_list; + int retries = 0; int err; ch = container_of(napi, struct dpaa2_eth_channel, napi); @@ -1136,7 +1156,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) refill_pool(priv, ch, priv->bpid); store_cleaned = consume_frames(ch, &fq); - if (!store_cleaned) + if (store_cleaned <= 0) break; if (fq->type == DPAA2_RX_FQ) { rx_cleaned += store_cleaned; @@ -1163,7 +1183,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) do { err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx); cpu_relax(); - } while (err == -EBUSY); + } while (err == -EBUSY && retries++ < DPAA2_ETH_SWP_BUSY_RETRIES); WARN_ONCE(err, "CDAN notifications rearm failed on core %d", ch->nctx.desired_cpu); @@ -2046,7 +2066,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv) { struct fsl_mc_device *dpcon; struct device *dev = priv->net_dev->dev.parent; - struct dpcon_attr attrs; int err; err = fsl_mc_object_allocate(to_fsl_mc_device(dev), @@ -2071,12 +2090,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv) goto close; } - err = dpcon_get_attributes(priv->mc_io, 0, dpcon->mc_handle, &attrs); - if (err) { - dev_err(dev, "dpcon_get_attributes() failed\n"); - goto close; - } - err = dpcon_enable(priv->mc_io, 0, dpcon->mc_handle); if (err) { dev_err(dev, "dpcon_enable() failed\n"); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 8a0e65b3267f..686b651edcb2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -245,6 +245,14 @@ static inline struct dpaa2_faead *dpaa2_get_faead(void *buf_addr, bool swa) */ #define DPAA2_ETH_ENQUEUE_RETRIES 10 +/* Number of times to retry DPIO portal operations while waiting + * for portal to finish executing current command and become + * available. We want to avoid being stuck in a while loop in case + * hardware becomes unresponsive, but not give up too easily if + * the portal really is busy for valid reasons + */ +#define DPAA2_ETH_SWP_BUSY_RETRIES 1000 + /* Driver statistics, other than those in struct rtnl_link_stats64. * These are usually collected per-CPU and aggregated by ethtool. */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 0aa1c34019bb..dc9a6c36cac0 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -216,7 +216,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev, if (err == -EINVAL) /* Older firmware versions don't support all pages */ memset(&dpni_stats, 0, sizeof(dpni_stats)); - else + else if (err) netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j); num_cnt = dpni_stats_page_size[j] / sizeof(u64); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 3a14bbc26ea2..1c5243cc1dc6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3049,7 +3049,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) u32 sl; u32 credit; int i; - const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + static const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { {DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, {DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, {DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, @@ -3059,7 +3059,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, }; - const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + static const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index f8a87f8ca983..0059d440e1f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -45,6 +45,7 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_GET_LINK_MODE, /* (VF -> PF) get the link mode of pf */ HCLGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */ HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */ + HCLGE_MBX_PUSH_PROMISC_INFO, /* (PF -> VF) push vf promisc info */ HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */ HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 75ccc1e7076b..e48023643f4c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -366,6 +366,19 @@ struct hnae3_ae_dev { * Enable/disable HW GRO * add_arfs_entry * Check the 5-tuples of flow, and create flow director rule + * get_vf_config + * Get the VF configuration setting by the host + * set_vf_link_state + * Set VF link status + * set_vf_spoofchk + * Enable/disable spoof check for specified vf + * set_vf_trust + * Enable/disable trust for specified vf, if the vf being trusted, then + * it can enable promisc mode + * set_vf_rate + * Set the max tx rate of specified vf. + * set_vf_mac + * Configure the default MAC for specified VF */ struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); @@ -531,6 +544,16 @@ struct hnae3_ae_ops { int (*mac_connect_phy)(struct hnae3_handle *handle); void (*mac_disconnect_phy)(struct hnae3_handle *handle); void (*restore_vlan_table)(struct hnae3_handle *handle); + int (*get_vf_config)(struct hnae3_handle *handle, int vf, + struct ifla_vf_info *ivf); + int (*set_vf_link_state)(struct hnae3_handle *handle, int vf, + int link_state); + int (*set_vf_spoofchk)(struct hnae3_handle *handle, int vf, + bool enable); + int (*set_vf_trust)(struct hnae3_handle *handle, int vf, bool enable); + int (*set_vf_rate)(struct hnae3_handle *handle, int vf, + int min_tx_rate, int max_tx_rate, bool force); + int (*set_vf_mac)(struct hnae3_handle *handle, int vf, u8 *p); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 616cad0faa21..6e0b2612d92d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -681,7 +681,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen, return 0; ret = skb_cow_head(skb, 0); - if (unlikely(ret)) + if (unlikely(ret < 0)) return ret; l3.hdr = skb_network_header(skb); @@ -962,14 +962,6 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, return 0; } -static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end) -{ - /* Config bd buffer end */ - if (!!frag_end) - hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U); - hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U); -} - static int hns3_handle_vtags(struct hns3_enet_ring *tx_ring, struct sk_buff *skb) { @@ -1062,7 +1054,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, skb_reset_mac_len(skb); ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_l4_proto_err++; u64_stats_update_end(&ring->syncp); @@ -1072,7 +1064,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, ret = hns3_set_l2l3l4(skb, ol4_proto, il4_proto, &type_cs_vlan_tso, &ol_type_vlan_len_msec); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_l2l3l4_err++; u64_stats_update_end(&ring->syncp); @@ -1081,7 +1073,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, ret = hns3_set_tso(skb, &paylen, &mss, &type_cs_vlan_tso); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_tso_err++; u64_stats_update_end(&ring->syncp); @@ -1102,9 +1094,10 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, } static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, - unsigned int size, int frag_end, - enum hns_desc_type type) + unsigned int size, enum hns_desc_type type) { +#define HNS3_LIKELY_BD_NUM 1 + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; struct hns3_desc *desc = &ring->desc[ring->next_to_use]; struct device *dev = ring_to_dev(ring); @@ -1118,7 +1111,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, int ret; ret = hns3_fill_skb_desc(ring, skb, desc); - if (unlikely(ret)) + if (unlikely(ret < 0)) return ret; dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); @@ -1137,19 +1130,16 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc_cb->length = size; if (likely(size <= HNS3_MAX_BD_SIZE)) { - u16 bdtp_fe_sc_vld_ra_ri = 0; - desc_cb->priv = priv; desc_cb->dma = dma; desc_cb->type = type; desc->addr = cpu_to_le64(dma); desc->tx.send_size = cpu_to_le16(size); - hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end); desc->tx.bdtp_fe_sc_vld_ra_ri = - cpu_to_le16(bdtp_fe_sc_vld_ra_ri); + cpu_to_le16(BIT(HNS3_TXD_VLD_B)); ring_ptr_move_fw(ring, next_to_use); - return 0; + return HNS3_LIKELY_BD_NUM; } frag_buf_num = hns3_tx_bd_count(size); @@ -1158,8 +1148,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, /* When frag size is bigger than hardware limit, split this frag */ for (k = 0; k < frag_buf_num; k++) { - u16 bdtp_fe_sc_vld_ra_ri = 0; - /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */ desc_cb->priv = priv; desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k; @@ -1170,11 +1158,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k); desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ? (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE); - hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, - frag_end && (k == frag_buf_num - 1) ? - 1 : 0); desc->tx.bdtp_fe_sc_vld_ra_ri = - cpu_to_le16(bdtp_fe_sc_vld_ra_ri); + cpu_to_le16(BIT(HNS3_TXD_VLD_B)); /* move ring pointer to next */ ring_ptr_move_fw(ring, next_to_use); @@ -1183,23 +1168,78 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc = &ring->desc[ring->next_to_use]; } - return 0; + return frag_buf_num; } -static unsigned int hns3_nic_bd_num(struct sk_buff *skb) +static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size, + unsigned int bd_num) { - unsigned int bd_num; + unsigned int size; int i; - /* if the total len is within the max bd limit */ - if (likely(skb->len <= HNS3_MAX_BD_SIZE)) - return skb_shinfo(skb)->nr_frags + 1; + size = skb_headlen(skb); + while (size > HNS3_MAX_BD_SIZE) { + bd_size[bd_num++] = HNS3_MAX_BD_SIZE; + size -= HNS3_MAX_BD_SIZE; - bd_num = hns3_tx_bd_count(skb_headlen(skb)); + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } + + if (size) { + bd_size[bd_num++] = size; + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - bd_num += hns3_tx_bd_count(skb_frag_size(frag)); + size = skb_frag_size(frag); + if (!size) + continue; + + while (size > HNS3_MAX_BD_SIZE) { + bd_size[bd_num++] = HNS3_MAX_BD_SIZE; + size -= HNS3_MAX_BD_SIZE; + + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } + + bd_size[bd_num++] = size; + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + } + + return bd_num; +} + +static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size) +{ + struct sk_buff *frag_skb; + unsigned int bd_num = 0; + + /* If the total len is within the max bd limit */ + if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) && + skb_shinfo(skb)->nr_frags < HNS3_MAX_NON_TSO_BD_NUM)) + return skb_shinfo(skb)->nr_frags + 1U; + + /* The below case will always be linearized, return + * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized. + */ + if (unlikely(skb->len > HNS3_MAX_TSO_SIZE || + (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE))) + return HNS3_MAX_TSO_BD_NUM + 1U; + + bd_num = hns3_skb_bd_num(skb, bd_size, bd_num); + + if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; + + skb_walk_frags(skb, frag_skb) { + bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num); + if (bd_num > HNS3_MAX_TSO_BD_NUM) + return bd_num; } return bd_num; @@ -1218,26 +1258,26 @@ static unsigned int hns3_gso_hdr_len(struct sk_buff *skb) * 7 frags to to be larger than gso header len + mss, and the remaining * continuous 7 frags to be larger than MSS except the last 7 frags. */ -static bool hns3_skb_need_linearized(struct sk_buff *skb) +static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size, + unsigned int bd_num) { - int bd_limit = HNS3_MAX_BD_NUM_NORMAL - 1; unsigned int tot_len = 0; int i; - for (i = 0; i < bd_limit; i++) - tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); + for (i = 0; i < HNS3_MAX_NON_TSO_BD_NUM - 1U; i++) + tot_len += bd_size[i]; - /* ensure headlen + the first 7 frags is greater than mss + header - * and the first 7 frags is greater than mss. - */ - if (((tot_len + skb_headlen(skb)) < (skb_shinfo(skb)->gso_size + - hns3_gso_hdr_len(skb))) || (tot_len < skb_shinfo(skb)->gso_size)) + /* ensure the first 8 frags is greater than mss + header */ + if (tot_len + bd_size[HNS3_MAX_NON_TSO_BD_NUM - 1U] < + skb_shinfo(skb)->gso_size + hns3_gso_hdr_len(skb)) return true; - /* ensure the remaining continuous 7 buffer is greater than mss */ - for (i = 0; i < (skb_shinfo(skb)->nr_frags - bd_limit - 1); i++) { - tot_len -= skb_frag_size(&skb_shinfo(skb)->frags[i]); - tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i + bd_limit]); + /* ensure every continuous 7 buffer is greater than mss + * except the last one. + */ + for (i = 0; i < bd_num - HNS3_MAX_NON_TSO_BD_NUM; i++) { + tot_len -= bd_size[i]; + tot_len += bd_size[i + HNS3_MAX_NON_TSO_BD_NUM - 1U]; if (tot_len < skb_shinfo(skb)->gso_size) return true; @@ -1249,15 +1289,16 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb) static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, struct sk_buff **out_skb) { + unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U]; struct sk_buff *skb = *out_skb; unsigned int bd_num; - bd_num = hns3_nic_bd_num(skb); - if (unlikely(bd_num > HNS3_MAX_BD_NUM_NORMAL)) { + bd_num = hns3_tx_bd_num(skb, bd_size); + if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) { struct sk_buff *new_skb; - if (skb_is_gso(skb) && bd_num <= HNS3_MAX_BD_NUM_TSO && - !hns3_skb_need_linearized(skb)) + if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) && + !hns3_skb_need_linearized(skb, bd_size, bd_num)) goto out; /* manual split the send packet */ @@ -1267,9 +1308,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, dev_kfree_skb_any(skb); *out_skb = new_skb; - bd_num = hns3_nic_bd_num(new_skb); - if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_TSO) || - (!skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_NORMAL)) + bd_num = hns3_tx_bd_count(new_skb->len); + if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_TSO_BD_NUM) || + (!skb_is_gso(new_skb) && + bd_num > HNS3_MAX_NON_TSO_BD_NUM)) return -ENOMEM; u64_stats_update_begin(&ring->syncp); @@ -1314,6 +1356,37 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) } } +static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, + struct sk_buff *skb, enum hns_desc_type type) +{ + unsigned int size = skb_headlen(skb); + int i, ret, bd_num = 0; + + if (size) { + ret = hns3_fill_desc(ring, skb, size, type); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + size = skb_frag_size(frag); + if (!size) + continue; + + ret = hns3_fill_desc(ring, frag, size, DESC_TYPE_PAGE); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + + return bd_num; +} + netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -1321,58 +1394,54 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) &tx_ring_data(priv, skb->queue_mapping); struct hns3_enet_ring *ring = ring_data->ring; struct netdev_queue *dev_queue; - skb_frag_t *frag; - int next_to_use_head; - int buf_num; - int seg_num; - int size; + int pre_ntu, next_to_use_head; + struct sk_buff *frag_skb; + int bd_num = 0; int ret; - int i; /* Prefetch the data used later */ prefetch(skb->data); - buf_num = hns3_nic_maybe_stop_tx(ring, &skb); - if (unlikely(buf_num <= 0)) { - if (buf_num == -EBUSY) { + ret = hns3_nic_maybe_stop_tx(ring, &skb); + if (unlikely(ret <= 0)) { + if (ret == -EBUSY) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_busy++; u64_stats_update_end(&ring->syncp); goto out_net_tx_busy; - } else if (buf_num == -ENOMEM) { + } else if (ret == -ENOMEM) { u64_stats_update_begin(&ring->syncp); ring->stats.sw_err_cnt++; u64_stats_update_end(&ring->syncp); } - hns3_rl_err(netdev, "xmit error: %d!\n", buf_num); + hns3_rl_err(netdev, "xmit error: %d!\n", ret); goto out_err_tx_ok; } - /* No. of segments (plus a header) */ - seg_num = skb_shinfo(skb)->nr_frags + 1; - /* Fill the first part */ - size = skb_headlen(skb); - next_to_use_head = ring->next_to_use; - ret = hns3_fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0, - DESC_TYPE_SKB); - if (unlikely(ret)) + ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); + if (unlikely(ret < 0)) goto fill_err; - /* Fill the fragments */ - for (i = 1; i < seg_num; i++) { - frag = &skb_shinfo(skb)->frags[i - 1]; - size = skb_frag_size(frag); + bd_num += ret; - ret = hns3_fill_desc(ring, frag, size, - seg_num - 1 == i ? 1 : 0, - DESC_TYPE_PAGE); + if (!skb_has_frag_list(skb)) + goto out; - if (unlikely(ret)) + skb_walk_frags(skb, frag_skb) { + ret = hns3_fill_skb_to_desc(ring, frag_skb, DESC_TYPE_PAGE); + if (unlikely(ret < 0)) goto fill_err; + + bd_num += ret; } +out: + pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) : + (ring->desc_num - 1); + ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |= + cpu_to_le16(BIT(HNS3_TXD_FE_B)); /* Complete translate all packets */ dev_queue = netdev_get_tx_queue(netdev, ring_data->queue_index); @@ -1380,7 +1449,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) wmb(); /* Commit all data before submit */ - hnae3_queue_xmit(ring->tqp, buf_num); + hnae3_queue_xmit(ring->tqp, bd_num); return NETDEV_TX_OK; @@ -1413,6 +1482,16 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) return 0; } + /* For VF device, if there is a perm_addr, then the user will not + * be allowed to change the address. + */ + if (!hns3_is_phys_func(h->pdev) && + !is_zero_ether_addr(netdev->perm_addr)) { + netdev_err(netdev, "has permanent MAC %pM, user MAC %pM not allow\n", + netdev->perm_addr, mac_addr->sa_data); + return -EPERM; + } + ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false); if (ret) { netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret); @@ -1643,6 +1722,29 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, return ret; } +static int hns3_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + + if (hns3_nic_resetting(netdev)) + return -EBUSY; + + if (!handle->ae_algo->ops->set_vf_spoofchk) + return -EOPNOTSUPP; + + return handle->ae_algo->ops->set_vf_spoofchk(handle, vf, enable); +} + +static int hns3_set_vf_trust(struct net_device *netdev, int vf, bool enable) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + + if (!handle->ae_algo->ops->set_vf_trust) + return -EOPNOTSUPP; + + return handle->ae_algo->ops->set_vf_trust(handle, vf, enable); +} + static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -1805,6 +1907,57 @@ static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, } #endif +static int hns3_nic_get_vf_config(struct net_device *ndev, int vf, + struct ifla_vf_info *ivf) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (!h->ae_algo->ops->get_vf_config) + return -EOPNOTSUPP; + + return h->ae_algo->ops->get_vf_config(h, vf, ivf); +} + +static int hns3_nic_set_vf_link_state(struct net_device *ndev, int vf, + int link_state) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (!h->ae_algo->ops->set_vf_link_state) + return -EOPNOTSUPP; + + return h->ae_algo->ops->set_vf_link_state(h, vf, link_state); +} + +static int hns3_nic_set_vf_rate(struct net_device *ndev, int vf, + int min_tx_rate, int max_tx_rate) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (!h->ae_algo->ops->set_vf_rate) + return -EOPNOTSUPP; + + return h->ae_algo->ops->set_vf_rate(h, vf, min_tx_rate, max_tx_rate, + false); +} + +static int hns3_nic_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!h->ae_algo->ops->set_vf_mac) + return -EOPNOTSUPP; + + if (is_multicast_ether_addr(mac)) { + netdev_err(netdev, + "Invalid MAC:%pM specified. Could not set MAC\n", + mac); + return -EINVAL; + } + + return h->ae_algo->ops->set_vf_mac(h, vf_id, mac); +} + static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_open = hns3_nic_net_open, .ndo_stop = hns3_nic_net_stop, @@ -1820,10 +1973,15 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_vlan_rx_add_vid = hns3_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = hns3_vlan_rx_kill_vid, .ndo_set_vf_vlan = hns3_ndo_set_vf_vlan, + .ndo_set_vf_spoofchk = hns3_set_vf_spoofchk, + .ndo_set_vf_trust = hns3_set_vf_trust, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = hns3_rx_flow_steer, #endif - + .ndo_get_vf_config = hns3_nic_get_vf_config, + .ndo_set_vf_link_state = hns3_nic_set_vf_link_state, + .ndo_set_vf_rate = hns3_nic_set_vf_rate, + .ndo_set_vf_mac = hns3_nic_set_vf_mac, }; bool hns3_is_phys_func(struct pci_dev *pdev) @@ -2069,9 +2227,8 @@ static void hns3_set_default_feature(struct net_device *netdev) NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; - - netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_TSO_MANGLEID | NETIF_F_FRAGLIST; netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; @@ -2081,21 +2238,24 @@ static void hns3_set_default_feature(struct net_device *netdev) NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_FRAGLIST; netdev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_FRAGLIST; netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC; + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_FRAGLIST; if (pdev->revision >= 0x21) { netdev->hw_features |= NETIF_F_GRO_HW; @@ -2358,7 +2518,7 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring) netdev_tx_completed_queue(dev_queue, pkts, bytes); if (unlikely(pkts && netif_carrier_ok(netdev) && - (ring_space(ring) > HNS3_MAX_BD_PER_PKT))) { + ring_space(ring) > HNS3_MAX_TSO_BD_NUM)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -3743,23 +3903,24 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv) } /* Set mac addr if it is configured. or leave it to the AE driver */ -static int hns3_init_mac_addr(struct net_device *netdev, bool init) +static int hns3_init_mac_addr(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; u8 mac_addr_temp[ETH_ALEN]; int ret = 0; - if (h->ae_algo->ops->get_mac_addr && init) { + if (h->ae_algo->ops->get_mac_addr) h->ae_algo->ops->get_mac_addr(h, mac_addr_temp); - ether_addr_copy(netdev->dev_addr, mac_addr_temp); - } /* Check if the MAC address is valid, if not get a random one */ - if (!is_valid_ether_addr(netdev->dev_addr)) { + if (!is_valid_ether_addr(mac_addr_temp)) { eth_hw_addr_random(netdev); dev_warn(priv->dev, "using random MAC address %pM\n", netdev->dev_addr); + } else { + ether_addr_copy(netdev->dev_addr, mac_addr_temp); + ether_addr_copy(netdev->perm_addr, mac_addr_temp); } if (h->ae_algo->ops->set_mac_addr) @@ -3863,7 +4024,7 @@ static int hns3_client_init(struct hnae3_handle *handle) handle->kinfo.netdev = netdev; handle->priv = (void *)priv; - hns3_init_mac_addr(netdev, true); + hns3_init_mac_addr(netdev); hns3_set_default_feature(netdev); @@ -4331,7 +4492,7 @@ static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle) bool vlan_filter_enable; int ret; - ret = hns3_init_mac_addr(netdev, false); + ret = hns3_init_mac_addr(netdev); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 2110fa3b4479..c5b7c22263b1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -76,7 +76,7 @@ enum hns3_nic_state { #define HNS3_RING_NAME_LEN 16 #define HNS3_BUFFER_SIZE_2048 2048 #define HNS3_RING_MAX_PENDING 32760 -#define HNS3_RING_MIN_PENDING 24 +#define HNS3_RING_MIN_PENDING 72 #define HNS3_RING_BD_MULTIPLE 8 /* max frame size of mac */ #define HNS3_MAC_MAX_FRAME 9728 @@ -195,9 +195,13 @@ enum hns3_nic_state { #define HNS3_VECTOR_INITED 1 #define HNS3_MAX_BD_SIZE 65535 -#define HNS3_MAX_BD_NUM_NORMAL 8 -#define HNS3_MAX_BD_NUM_TSO 63 -#define HNS3_MAX_BD_PER_PKT MAX_SKB_FRAGS +#define HNS3_MAX_NON_TSO_BD_NUM 8U +#define HNS3_MAX_TSO_BD_NUM 63U +#define HNS3_MAX_TSO_SIZE \ + (HNS3_MAX_BD_SIZE * HNS3_MAX_TSO_BD_NUM) + +#define HNS3_MAX_NON_TSO_SIZE \ + (HNS3_MAX_BD_SIZE * HNS3_MAX_NON_TSO_BD_NUM) #define HNS3_VECTOR_GL0_OFFSET 0x100 #define HNS3_VECTOR_GL1_OFFSET 0x200 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 4821fe08b5e4..3578832067ff 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -244,7 +244,7 @@ enum hclge_opcode_type { /* QCN commands */ HCLGE_OPC_QCN_MOD_CFG = 0x1A01, HCLGE_OPC_QCN_GRP_TMPLT_CFG = 0x1A02, - HCLGE_OPC_QCN_SHAPPING_IR_CFG = 0x1A03, + HCLGE_OPC_QCN_SHAPPING_CFG = 0x1A03, HCLGE_OPC_QCN_SHAPPING_BS_CFG = 0x1A04, HCLGE_OPC_QCN_QSET_LINK_CFG = 0x1A05, HCLGE_OPC_QCN_RP_STATUS_GET = 0x1A06, @@ -1090,9 +1090,6 @@ void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, enum hclge_opcode_type opcode, bool is_read); void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read); -int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, - struct hclge_promisc_param *param); - enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw, struct hclge_desc *desc); enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index d0128d792717..0ccc8e7b19d0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1110,6 +1110,82 @@ static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev) } } +static void hclge_dbg_dump_qs_shaper_single(struct hclge_dev *hdev, u16 qsid) +{ + struct hclge_qs_shapping_cmd *shap_cfg_cmd; + u8 ir_u, ir_b, ir_s, bs_b, bs_s; + struct hclge_desc desc; + u32 shapping_para; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, true); + + shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data; + shap_cfg_cmd->qs_id = cpu_to_le16(qsid); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "qs%u failed to get tx_rate, ret=%d\n", + qsid, ret); + return; + } + + shapping_para = le32_to_cpu(shap_cfg_cmd->qs_shapping_para); + ir_b = hclge_tm_get_field(shapping_para, IR_B); + ir_u = hclge_tm_get_field(shapping_para, IR_U); + ir_s = hclge_tm_get_field(shapping_para, IR_S); + bs_b = hclge_tm_get_field(shapping_para, BS_B); + bs_s = hclge_tm_get_field(shapping_para, BS_S); + + dev_info(&hdev->pdev->dev, + "qs%u ir_b:%u, ir_u:%u, ir_s:%u, bs_b:%u, bs_s:%u\n", + qsid, ir_b, ir_u, ir_s, bs_b, bs_s); +} + +static void hclge_dbg_dump_qs_shaper_all(struct hclge_dev *hdev) +{ + struct hnae3_knic_private_info *kinfo; + struct hclge_vport *vport; + int vport_id, i; + + for (vport_id = 0; vport_id <= pci_num_vf(hdev->pdev); vport_id++) { + vport = &hdev->vport[vport_id]; + kinfo = &vport->nic.kinfo; + + dev_info(&hdev->pdev->dev, "qs cfg of vport%d:\n", vport_id); + + for (i = 0; i < kinfo->num_tc; i++) { + u16 qsid = vport->qs_offset + i; + + hclge_dbg_dump_qs_shaper_single(hdev, qsid); + } + } +} + +static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev, + const char *cmd_buf) +{ +#define HCLGE_MAX_QSET_NUM 1024 + + u16 qsid; + int ret; + + ret = kstrtou16(cmd_buf, 0, &qsid); + if (ret) { + hclge_dbg_dump_qs_shaper_all(hdev); + return; + } + + if (qsid >= HCLGE_MAX_QSET_NUM) { + dev_err(&hdev->pdev->dev, "qsid(%u) out of range[0-1023]\n", + qsid); + return; + } + + hclge_dbg_dump_qs_shaper_single(hdev, qsid); +} + int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) { #define DUMP_REG "dump reg" @@ -1145,6 +1221,9 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) &cmd_buf[sizeof("dump ncl_config")]); } else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) { hclge_dbg_dump_mac_tnl_status(hdev); + } else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) { + hclge_dbg_dump_qs_shaper(hdev, + &cmd_buf[sizeof("dump qs shaper")]); } else { dev_info(&hdev->pdev->dev, "unknown command\n"); return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e02e01bd9eff..2f0386dcb2f6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -55,6 +55,8 @@ #define HCLGE_LINK_STATUS_MS 10 +#define HCLGE_VF_VPORT_START_NUM 1 + static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps); static int hclge_init_vlan_config(struct hclge_dev *hdev); static void hclge_sync_vlan_filter(struct hclge_dev *hdev); @@ -1194,6 +1196,35 @@ static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability) hclge_parse_backplane_link_mode(hdev, speed_ability); } +static u32 hclge_get_max_speed(u8 speed_ability) +{ + if (speed_ability & HCLGE_SUPPORT_100G_BIT) + return HCLGE_MAC_SPEED_100G; + + if (speed_ability & HCLGE_SUPPORT_50G_BIT) + return HCLGE_MAC_SPEED_50G; + + if (speed_ability & HCLGE_SUPPORT_40G_BIT) + return HCLGE_MAC_SPEED_40G; + + if (speed_ability & HCLGE_SUPPORT_25G_BIT) + return HCLGE_MAC_SPEED_25G; + + if (speed_ability & HCLGE_SUPPORT_10G_BIT) + return HCLGE_MAC_SPEED_10G; + + if (speed_ability & HCLGE_SUPPORT_1G_BIT) + return HCLGE_MAC_SPEED_1G; + + if (speed_ability & HCLGE_SUPPORT_100M_BIT) + return HCLGE_MAC_SPEED_100M; + + if (speed_ability & HCLGE_SUPPORT_10M_BIT) + return HCLGE_MAC_SPEED_10M; + + return HCLGE_MAC_SPEED_1G; +} + static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) { struct hclge_cfg_param_cmd *req; @@ -1364,6 +1395,8 @@ static int hclge_configure(struct hclge_dev *hdev) hclge_parse_link_mode(hdev, cfg.speed_ability); + hdev->hw.mac.max_speed = hclge_get_max_speed(cfg.speed_ability); + if ((hdev->tc_max > HNAE3_MAX_TC) || (hdev->tc_max < 1)) { dev_warn(&hdev->pdev->dev, "TC num = %d.\n", @@ -1649,6 +1682,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) for (i = 0; i < num_vport; i++) { vport->back = hdev; vport->vport_id = i; + vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO; vport->mps = HCLGE_MAC_DEFAULT_FRAME; vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE; vport->rxvlan_cfg.rx_vlan_offload_en = true; @@ -2871,6 +2905,62 @@ static int hclge_get_status(struct hnae3_handle *handle) return hdev->hw.mac.link; } +static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf) +{ + if (pci_num_vf(hdev->pdev) == 0) { + dev_err(&hdev->pdev->dev, + "SRIOV is disabled, can not get vport(%d) info.\n", vf); + return NULL; + } + + if (vf < 0 || vf >= pci_num_vf(hdev->pdev)) { + dev_err(&hdev->pdev->dev, + "vf id(%d) is out of range(0 <= vfid < %d)\n", + vf, pci_num_vf(hdev->pdev)); + return NULL; + } + + /* VF start from 1 in vport */ + vf += HCLGE_VF_VPORT_START_NUM; + return &hdev->vport[vf]; +} + +static int hclge_get_vf_config(struct hnae3_handle *handle, int vf, + struct ifla_vf_info *ivf) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + ivf->vf = vf; + ivf->linkstate = vport->vf_info.link_state; + ivf->spoofchk = vport->vf_info.spoofchk; + ivf->trusted = vport->vf_info.trusted; + ivf->min_tx_rate = 0; + ivf->max_tx_rate = vport->vf_info.max_tx_rate; + ether_addr_copy(ivf->mac, vport->vf_info.mac); + + return 0; +} + +static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, + int link_state) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + vport->vf_info.link_state = link_state; + + return 0; +} + static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) { u32 rst_src_reg, cmdq_src_reg, msix_src_reg; @@ -4577,8 +4667,8 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, int vector, return ret; } -int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, - struct hclge_promisc_param *param) +static int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, + struct hclge_promisc_param *param) { struct hclge_promisc_cfg_cmd *req; struct hclge_desc desc; @@ -4605,8 +4695,9 @@ int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, return ret; } -void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, - bool en_mc, bool en_bc, int vport_id) +static void hclge_promisc_param_init(struct hclge_promisc_param *param, + bool en_uc, bool en_mc, bool en_bc, + int vport_id) { if (!param) return; @@ -4621,12 +4712,21 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, param->vf_id = vport_id; } +int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc, + bool en_mc_pmc, bool en_bc_pmc) +{ + struct hclge_dev *hdev = vport->back; + struct hclge_promisc_param param; + + hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc, + vport->vport_id); + return hclge_cmd_set_promisc_mode(hdev, ¶m); +} + static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, bool en_mc_pmc) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - struct hclge_promisc_param param; bool en_bc_pmc = true; /* For revision 0x20, if broadcast promisc enabled, vlan filter is @@ -4636,9 +4736,8 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, if (handle->pdev->revision == 0x20) en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false; - hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc, - vport->vport_id); - return hclge_cmd_set_promisc_mode(hdev, ¶m); + return hclge_set_vport_promisc_mode(vport, en_uc_pmc, en_mc_pmc, + en_bc_pmc); } static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) @@ -7410,6 +7509,67 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, return return_status; } +static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx, + u8 *mac_addr) +{ + struct hclge_mac_vlan_tbl_entry_cmd req; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u16 egress_port = 0; + int i; + + if (is_zero_ether_addr(mac_addr)) + return false; + + memset(&req, 0, sizeof(req)); + hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M, + HCLGE_MAC_EPORT_VFID_S, vport->vport_id); + req.egress_port = cpu_to_le16(egress_port); + hclge_prepare_mac_addr(&req, mac_addr, false); + + if (hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false) != -ENOENT) + return true; + + vf_idx += HCLGE_VF_VPORT_START_NUM; + for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) + if (i != vf_idx && + ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) + return true; + + return false; +} + +static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, + u8 *mac_addr) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (ether_addr_equal(mac_addr, vport->vf_info.mac)) { + dev_info(&hdev->pdev->dev, + "Specified MAC(=%pM) is same as before, no change committed!\n", + mac_addr); + return 0; + } + + if (hclge_check_vf_mac_exist(vport, vf, mac_addr)) { + dev_err(&hdev->pdev->dev, "Specified MAC(=%pM) exists!\n", + mac_addr); + return -EEXIST; + } + + ether_addr_copy(vport->vf_info.mac, mac_addr); + dev_info(&hdev->pdev->dev, + "MAC of VF %d has been set to %pM, and it will be reinitialized!\n", + vf, mac_addr); + + return hclge_inform_reset_assert_to_vf(vport); +} + static int hclge_add_mgr_tbl(struct hclge_dev *hdev, const struct hclge_mac_mgr_tbl_entry_cmd *req) { @@ -7583,6 +7743,8 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, __be16 proto) { #define HCLGE_MAX_VF_BYTES 16 + + struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_vlan_filter_vf_cfg_cmd *req0; struct hclge_vlan_filter_vf_cfg_cmd *req1; struct hclge_desc desc[2]; @@ -7591,10 +7753,18 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, int ret; /* if vf vlan table is full, firmware will close vf vlan filter, it - * is unable and unnecessary to add new vlan id to vf vlan filter + * is unable and unnecessary to add new vlan id to vf vlan filter. + * If spoof check is enable, and vf vlan is full, it shouldn't add + * new vlan, because tx packets with these vlan id will be dropped. */ - if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) + if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) { + if (vport->vf_info.spoofchk && vlan) { + dev_err(&hdev->pdev->dev, + "Can't add vlan due to spoof check is on and vf vlan table is full\n"); + return -EPERM; + } return 0; + } hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_VLAN_FILTER_VF_CFG, false); @@ -8091,12 +8261,15 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) } list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { - if (vlan->hd_tbl_status) - hclge_set_vlan_filter_hw(hdev, - htons(ETH_P_8021Q), - vport->vport_id, - vlan->vlan_id, - false); + int ret; + + if (!vlan->hd_tbl_status) + continue; + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, + vlan->vlan_id, false); + if (ret) + break; } } @@ -9338,6 +9511,219 @@ static void hclge_stats_clear(struct hclge_dev *hdev) memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); } +static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable) +{ + return hclge_config_switch_param(hdev, vf, enable, + HCLGE_SWITCH_ANTI_SPOOF_MASK); +} + +static int hclge_set_vlan_spoofchk(struct hclge_dev *hdev, int vf, bool enable) +{ + return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, + HCLGE_FILTER_FE_NIC_INGRESS_B, + enable, vf); +} + +static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable) +{ + int ret; + + ret = hclge_set_mac_spoofchk(hdev, vf, enable); + if (ret) { + dev_err(&hdev->pdev->dev, + "Set vf %d mac spoof check %s failed, ret=%d\n", + vf, enable ? "on" : "off", ret); + return ret; + } + + ret = hclge_set_vlan_spoofchk(hdev, vf, enable); + if (ret) + dev_err(&hdev->pdev->dev, + "Set vf %d vlan spoof check %s failed, ret=%d\n", + vf, enable ? "on" : "off", ret); + + return ret; +} + +static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf, + bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 new_spoofchk = enable ? 1 : 0; + int ret; + + if (hdev->pdev->revision == 0x20) + return -EOPNOTSUPP; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (vport->vf_info.spoofchk == new_spoofchk) + return 0; + + if (enable && test_bit(vport->vport_id, hdev->vf_vlan_full)) + dev_warn(&hdev->pdev->dev, + "vf %d vlan table is full, enable spoof check may cause its packet send fail\n", + vf); + else if (enable && hclge_is_umv_space_full(vport)) + dev_warn(&hdev->pdev->dev, + "vf %d mac table is full, enable spoof check may cause its packet send fail\n", + vf); + + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, enable); + if (ret) + return ret; + + vport->vf_info.spoofchk = new_spoofchk; + return 0; +} + +static int hclge_reset_vport_spoofchk(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + int ret; + int i; + + if (hdev->pdev->revision == 0x20) + return 0; + + /* resume the vf spoof check state after reset */ + for (i = 0; i < hdev->num_alloc_vport; i++) { + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, + vport->vf_info.spoofchk); + if (ret) + return ret; + + vport++; + } + + return 0; +} + +static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 new_trusted = enable ? 1 : 0; + bool en_bc_pmc; + int ret; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (vport->vf_info.trusted == new_trusted) + return 0; + + /* Disable promisc mode for VF if it is not trusted any more. */ + if (!enable && vport->vf_info.promisc_enable) { + en_bc_pmc = hdev->pdev->revision != 0x20; + ret = hclge_set_vport_promisc_mode(vport, false, false, + en_bc_pmc); + if (ret) + return ret; + vport->vf_info.promisc_enable = 0; + hclge_inform_vf_promisc_info(vport); + } + + vport->vf_info.trusted = new_trusted; + + return 0; +} + +static void hclge_reset_vf_rate(struct hclge_dev *hdev) +{ + int ret; + int vf; + + /* reset vf rate to default value */ + for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) { + struct hclge_vport *vport = &hdev->vport[vf]; + + vport->vf_info.max_tx_rate = 0; + ret = hclge_tm_qs_shaper_cfg(vport, vport->vf_info.max_tx_rate); + if (ret) + dev_err(&hdev->pdev->dev, + "vf%d failed to reset to default, ret=%d\n", + vf - HCLGE_VF_VPORT_START_NUM, ret); + } +} + +static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf, + int min_tx_rate, int max_tx_rate) +{ + if (min_tx_rate != 0 || + max_tx_rate < 0 || max_tx_rate > hdev->hw.mac.max_speed) { + dev_err(&hdev->pdev->dev, + "min_tx_rate:%d [0], max_tx_rate:%d [0, %u]\n", + min_tx_rate, max_tx_rate, hdev->hw.mac.max_speed); + return -EINVAL; + } + + return 0; +} + +static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf, + int min_tx_rate, int max_tx_rate, bool force) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate); + if (ret) + return ret; + + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + if (!force && max_tx_rate == vport->vf_info.max_tx_rate) + return 0; + + ret = hclge_tm_qs_shaper_cfg(vport, max_tx_rate); + if (ret) + return ret; + + vport->vf_info.max_tx_rate = max_tx_rate; + + return 0; +} + +static int hclge_resume_vf_rate(struct hclge_dev *hdev) +{ + struct hnae3_handle *handle = &hdev->vport->nic; + struct hclge_vport *vport; + int ret; + int vf; + + /* resume the vf max_tx_rate after reset */ + for (vf = 0; vf < pci_num_vf(hdev->pdev); vf++) { + vport = hclge_get_vf_vport(hdev, vf); + if (!vport) + return -EINVAL; + + /* zero means max rate, after reset, firmware already set it to + * max rate, so just continue. + */ + if (!vport->vf_info.max_tx_rate) + continue; + + ret = hclge_set_vf_rate(handle, vf, 0, + vport->vf_info.max_tx_rate, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "vf%d failed to resume tx_rate:%u, ret=%d\n", + vf, vport->vf_info.max_tx_rate, ret); + return ret; + } + } + + return 0; +} + static void hclge_reset_vport_state(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; @@ -9437,6 +9823,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) } hclge_reset_vport_state(hdev); + ret = hclge_reset_vport_spoofchk(hdev); + if (ret) + return ret; + + ret = hclge_resume_vf_rate(hdev); + if (ret) + return ret; dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n", HCLGE_DRIVER_NAME); @@ -9449,6 +9842,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) struct hclge_dev *hdev = ae_dev->priv; struct hclge_mac *mac = &hdev->hw.mac; + hclge_reset_vf_rate(hdev); hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); @@ -10171,6 +10565,12 @@ static const struct hnae3_ae_ops hclge_ops = { .mac_connect_phy = hclge_mac_connect_phy, .mac_disconnect_phy = hclge_mac_disconnect_phy, .restore_vlan_table = hclge_restore_vlan_table, + .get_vf_config = hclge_get_vf_config, + .set_vf_link_state = hclge_set_vf_link_state, + .set_vf_spoofchk = hclge_set_vf_spoofchk, + .set_vf_trust = hclge_set_vf_trust, + .set_vf_rate = hclge_set_vf_rate, + .set_vf_mac = hclge_set_vf_mac, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index c3d56b872ed7..9e59f0e074be 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -258,6 +258,7 @@ struct hclge_mac { u8 support_autoneg; u8 speed_type; /* 0: sfp speed, 1: active speed */ u32 speed; + u32 max_speed; u32 speed_ability; /* speed ability supported by current media */ u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */ u32 fec_mode; /* active fec mode */ @@ -886,6 +887,15 @@ struct hclge_port_base_vlan_config { struct hclge_vlan_info vlan_info; }; +struct hclge_vf_info { + int link_state; + u8 mac[ETH_ALEN]; + u32 spoofchk; + u32 max_tx_rate; + u32 trusted; + u16 promisc_enable; +}; + struct hclge_vport { u16 alloc_tqps; /* Allocated Tx/Rx queues */ @@ -917,15 +927,15 @@ struct hclge_vport { unsigned long state; unsigned long last_active_jiffies; u32 mps; /* Max packet size */ + struct hclge_vf_info vf_info; struct list_head uc_mac_list; /* Store VF unicast table */ struct list_head mc_mac_list; /* Store VF multicast table */ struct list_head vlan_list; /* Store VF vlan table */ }; -void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, - bool en_mc, bool en_bc, int vport_id); - +int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc, + bool en_mc_pmc, bool en_bc_pmc); int hclge_add_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr); int hclge_rm_uc_addr_common(struct hclge_vport *vport, @@ -994,4 +1004,5 @@ int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc); void hclge_report_hw_error(struct hclge_dev *hdev, enum hnae3_hw_error_type type); +void hclge_inform_vf_promisc_info(struct hclge_vport *vport); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index f5da28a60d00..97463e11aca7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -205,12 +205,38 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en, static int hclge_set_vf_promisc_mode(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *req) { - bool en_bc = req->msg[1] ? true : false; - struct hclge_promisc_param param; +#define HCLGE_MBX_BC_INDEX 1 +#define HCLGE_MBX_UC_INDEX 2 +#define HCLGE_MBX_MC_INDEX 3 - /* vf is not allowed to enable unicast/multicast broadcast */ - hclge_promisc_param_init(¶m, false, false, en_bc, vport->vport_id); - return hclge_cmd_set_promisc_mode(vport->back, ¶m); + bool en_bc = req->msg[HCLGE_MBX_BC_INDEX] ? true : false; + bool en_uc = req->msg[HCLGE_MBX_UC_INDEX] ? true : false; + bool en_mc = req->msg[HCLGE_MBX_MC_INDEX] ? true : false; + int ret; + + if (!vport->vf_info.trusted) { + en_uc = false; + en_mc = false; + } + + ret = hclge_set_vport_promisc_mode(vport, en_uc, en_mc, en_bc); + if (req->mbx_need_resp) + hclge_gen_resp_to_vf(vport, req, ret, NULL, 0); + + vport->vf_info.promisc_enable = (en_uc || en_mc) ? 1 : 0; + + return ret; +} + +void hclge_inform_vf_promisc_info(struct hclge_vport *vport) +{ + u8 dest_vfid = (u8)vport->vport_id; + u8 msg_data[2]; + + memcpy(&msg_data[0], &vport->vf_info.promisc_enable, sizeof(u16)); + + hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data), + HCLGE_MBX_PUSH_PROMISC_INFO, dest_vfid); } static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, @@ -223,6 +249,20 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_MODIFY) { const u8 *old_addr = (const u8 *)(&mbx_req->msg[8]); + /* If VF MAC has been configured by the host then it + * cannot be overridden by the MAC specified by the VM. + */ + if (!is_zero_ether_addr(vport->vf_info.mac) && + !ether_addr_equal(mac_addr, vport->vf_info.mac)) { + status = -EPERM; + goto out; + } + + if (!is_valid_ether_addr(mac_addr)) { + status = -EINVAL; + goto out; + } + hclge_rm_uc_addr_common(vport, old_addr); status = hclge_add_uc_addr_common(vport, mac_addr); if (status) { @@ -250,6 +290,7 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, return -EIO; } +out: if (mbx_req->mbx_need_resp & HCLGE_MBX_NEED_RESP_BIT) hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0); @@ -324,6 +365,9 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, proto = msg_cmd->proto; status = hclge_set_vlan_filter(handle, cpu_to_be16(proto), vlan, is_kill); + if (mbx_req->mbx_need_resp) + return hclge_gen_resp_to_vf(vport, mbx_req, status, + NULL, 0); } else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) { struct hnae3_handle *handle = &vport->nic; bool en = msg_cmd->is_kill ? true : false; @@ -398,6 +442,13 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport, HCLGE_TQPS_RSS_INFO_LEN); } +static int hclge_get_vf_mac_addr(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req) +{ + return hclge_gen_resp_to_vf(vport, mbx_req, 0, vport->vf_info.mac, + ETH_ALEN); +} + static int hclge_get_vf_queue_depth(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req, bool gen_resp) @@ -428,6 +479,9 @@ static int hclge_get_vf_media_type(struct hclge_vport *vport, static int hclge_get_link_info(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req) { +#define HCLGE_VF_LINK_STATE_UP 1U +#define HCLGE_VF_LINK_STATE_DOWN 0U + struct hclge_dev *hdev = vport->back; u16 link_status; u8 msg_data[8]; @@ -435,7 +489,19 @@ static int hclge_get_link_info(struct hclge_vport *vport, u16 duplex; /* mac.link can only be 0 or 1 */ - link_status = (u16)hdev->hw.mac.link; + switch (vport->vf_info.link_state) { + case IFLA_VF_LINK_STATE_ENABLE: + link_status = HCLGE_VF_LINK_STATE_UP; + break; + case IFLA_VF_LINK_STATE_DISABLE: + link_status = HCLGE_VF_LINK_STATE_DOWN; + break; + case IFLA_VF_LINK_STATE_AUTO: + default: + link_status = (u16)hdev->hw.mac.link; + break; + } + duplex = hdev->hw.mac.duplex; memcpy(&msg_data[0], &link_status, sizeof(u16)); memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32)); @@ -749,6 +815,13 @@ void hclge_mbx_handler(struct hclge_dev *hdev) case HCLGE_MBX_PUSH_LINK_STATUS: hclge_handle_link_change_event(hdev, req); break; + case HCLGE_MBX_GET_MAC_ADDR: + ret = hclge_get_vf_mac_addr(vport, req); + if (ret) + dev_err(&hdev->pdev->dev, + "PF failed(%d) to get MAC for VF\n", + ret); + break; case HCLGE_MBX_NCSI_ERROR: hclge_handle_ncsi_error(hdev); break; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 62399cc1c5a6..b3c30e5f9aa5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -46,7 +46,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, #define DIVISOR_CLK (1000 * 8) #define DIVISOR_IR_B_126 (126 * DIVISOR_CLK) - const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { + static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { 6 * 256, /* Prioriy level */ 6 * 32, /* Prioriy group level */ 6 * 8, /* Port level */ @@ -511,6 +511,49 @@ static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc, u8 grp_id, return hclge_cmd_send(&hdev->hw, &desc, 1); } +int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate) +{ + struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; + struct hclge_qs_shapping_cmd *shap_cfg_cmd; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u8 ir_b, ir_u, ir_s; + u32 shaper_para; + int ret, i; + + if (!max_tx_rate) + max_tx_rate = HCLGE_ETHER_MAX_RATE; + + ret = hclge_shaper_para_calc(max_tx_rate, HCLGE_SHAPER_LVL_QSET, + &ir_b, &ir_u, &ir_s); + if (ret) + return ret; + + shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); + + for (i = 0; i < kinfo->num_tc; i++) { + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, + false); + + shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data; + shap_cfg_cmd->qs_id = cpu_to_le16(vport->qs_offset + i); + shap_cfg_cmd->qs_shapping_para = cpu_to_le32(shaper_para); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "vf%d, qs%u failed to set tx_rate:%d, ret=%d\n", + vport->vport_id, shap_cfg_cmd->qs_id, + max_tx_rate, ret); + return ret; + } + } + + return 0; +} + static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 818610988d34..95ef6e1204cf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -96,6 +96,12 @@ struct hclge_pg_shapping_cmd { __le32 pg_shapping_para; }; +struct hclge_qs_shapping_cmd { + __le16 qs_id; + u8 rsvd[2]; + __le32 qs_shapping_para; +}; + #define HCLGE_BP_GRP_NUM 32 #define HCLGE_BP_SUB_GRP_ID_S 0 #define HCLGE_BP_SUB_GRP_ID_M GENMASK(4, 0) @@ -154,4 +160,6 @@ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx); int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr); int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats); +int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate); + #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 7d7e712691b9..408e38644c60 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1113,6 +1113,7 @@ static int hclgevf_put_vector(struct hnae3_handle *handle, int vector) } static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, + bool en_uc_pmc, bool en_mc_pmc, bool en_bc_pmc) { struct hclge_mbx_vf_to_pf_cmd *req; @@ -1120,10 +1121,11 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, int ret; req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data; - hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false); req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE; req->msg[1] = en_bc_pmc ? 1 : 0; + req->msg[2] = en_uc_pmc ? 1 : 0; + req->msg[3] = en_mc_pmc ? 1 : 0; ret = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (ret) @@ -1133,9 +1135,17 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, return ret; } -static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc) +static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, + bool en_mc_pmc) { - return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc); + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct pci_dev *pdev = hdev->pdev; + bool en_bc_pmc; + + en_bc_pmc = pdev->revision != 0x20; + + return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc, + en_bc_pmc); } static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, @@ -1174,11 +1184,37 @@ static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle) } } +static int hclgevf_get_host_mac_addr(struct hclgevf_dev *hdev, u8 *p) +{ + u8 host_mac[ETH_ALEN]; + int status; + + status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_MAC_ADDR, 0, NULL, 0, + true, host_mac, ETH_ALEN); + if (status) { + dev_err(&hdev->pdev->dev, + "fail to get VF MAC from host %d", status); + return status; + } + + ether_addr_copy(p, host_mac); + + return 0; +} + static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + u8 host_mac_addr[ETH_ALEN]; - ether_addr_copy(p, hdev->hw.mac.mac_addr); + if (hclgevf_get_host_mac_addr(hdev, host_mac_addr)) + return; + + hdev->has_pf_mac = !is_zero_ether_addr(host_mac_addr); + if (hdev->has_pf_mac) + ether_addr_copy(p, host_mac_addr); + else + ether_addr_copy(p, hdev->hw.mac.mac_addr); } static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p, @@ -1275,7 +1311,7 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, memcpy(&msg_data[3], &proto, sizeof(proto)); ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, HCLGE_MBX_VLAN_FILTER, msg_data, - HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0); + HCLGEVF_VLAN_MBX_MSG_LEN, true, NULL, 0); /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence @@ -2648,12 +2684,6 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) return ret; } - if (pdev->revision >= 0x21) { - ret = hclgevf_set_promisc_mode(hdev, true); - if (ret) - return ret; - } - dev_info(&hdev->pdev->dev, "Reset done\n"); return 0; @@ -2728,17 +2758,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) if (ret) goto err_config; - /* vf is not allowed to enable unicast/multicast promisc mode. - * For revision 0x20, default to disable broadcast promisc mode, - * firmware makes sure broadcast packets can be accepted. - * For revision 0x21, default to enable broadcast promisc mode. - */ - if (pdev->revision >= 0x21) { - ret = hclgevf_set_promisc_mode(hdev, true); - if (ret) - goto err_config; - } - /* Initialize RSS for this VF */ ret = hclgevf_rss_init_hw(hdev); if (ret) { @@ -3152,6 +3171,7 @@ static const struct hnae3_ae_ops hclgevf_ops = { .get_global_queue_id = hclgevf_get_qid_global, .set_timer_task = hclgevf_set_timer_task, .get_link_mode = hclgevf_get_link_mode, + .set_promisc_mode = hclgevf_set_promisc_mode, }; static struct hnae3_ae_algo ae_algovf = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 2b8d6bc6d224..ef86155de9e0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -266,6 +266,7 @@ struct hclgevf_dev { u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ u8 hw_tc_map; + u8 has_pf_mac; u16 num_msi; u16 num_msi_left; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index a108191c9e50..72bacf89f09c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -205,6 +205,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) case HCLGE_MBX_ASSERTING_RESET: case HCLGE_MBX_LINK_STAT_MODE: case HCLGE_MBX_PUSH_VLAN_INFO: + case HCLGE_MBX_PUSH_PROMISC_INFO: /* set this mbx event as pending. This is required as we * might loose interrupt event when mbx task is busy * handling. This shall be cleared when mbx task just @@ -248,6 +249,14 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) crq->next_to_use); } +static void hclgevf_parse_promisc_info(struct hclgevf_dev *hdev, + u16 promisc_info) +{ + if (!promisc_info) + dev_info(&hdev->pdev->dev, + "Promisc mode is closed by host for being untrusted.\n"); +} + void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) { enum hnae3_reset_type reset_type; @@ -313,6 +322,9 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) hclgevf_update_port_base_vlan_info(hdev, state, (u8 *)vlan_info, 8); break; + case HCLGE_MBX_PUSH_PROMISC_INFO: + hclgevf_parse_promisc_info(hdev, msg_q[1]); + break; default: dev_err(&hdev->pdev->dev, "fetched unsupported(%d) message from arq\n", diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 111b3b8239e1..5fea65256b9d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2863,7 +2863,7 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status, skb->ip_summed = CHECKSUM_NONE; } -/* Reuse skb if possible, or allocate a new skb and add it to BM pool */ +/* Allocate a new skb and add it to BM pool */ static int mvpp2_rx_refill(struct mvpp2_port *port, struct mvpp2_bm_pool *bm_pool, int pool) { @@ -2871,7 +2871,6 @@ static int mvpp2_rx_refill(struct mvpp2_port *port, phys_addr_t phys_addr; void *buf; - /* No recycle or too many buffers are in use, so allocate a new skb */ buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr, &phys_addr, GFP_ATOMIC); if (!buf) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index fce9b3a24347..22c72fb7206a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3935,13 +3935,17 @@ static void mlx4_restart_one_down(struct pci_dev *pdev); static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, struct devlink *devlink); -static int mlx4_devlink_reload_down(struct devlink *devlink, +static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); struct mlx4_dev *dev = &priv->dev; struct mlx4_dev_persistent *persist = dev->persist; + if (netns_change) { + NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported"); + return -EOPNOTSUPP; + } if (persist->num_vfs) mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); mlx4_restart_one_down(persist->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index b860569d4247..6c72b592315b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -222,7 +222,8 @@ static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) } static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, - void *context) + void *context, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_err_ctx *err_ctx = context; @@ -301,7 +302,8 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, } static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_params *params = &priv->channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index bfed558637c2..b468549e96ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -135,7 +135,8 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) } static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, - void *context) + void *context, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_err_ctx *err_ctx = context; @@ -205,7 +206,8 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, } static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d685122d9ff7..be3c3c704bfc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -390,7 +390,8 @@ static void print_health_info(struct mlx5_core_dev *dev) static int mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); struct mlx5_core_health *health = &dev->priv.health; @@ -491,7 +492,8 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, static int mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx) + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); int err; @@ -545,7 +547,8 @@ static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { static int mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); @@ -555,7 +558,8 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, #define MLX5_CR_DUMP_CHUNK_SIZE 256 static int mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx) + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); u32 crdump_size = dev->priv.health.crdump_size; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 5d20d615663e..13e2944b1274 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -248,9 +248,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct net_device *fib_dev; struct fib_info *fi; - if (!net_eq(info->net, &init_net)) - return NOTIFY_DONE; - if (info->family != AF_INET) return NOTIFY_DONE; @@ -311,8 +308,8 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) return 0; mp->fib_nb.notifier_call = mlx5_lag_fib_event; - err = register_fib_notifier(&mp->fib_nb, - mlx5_lag_fib_event_flush); + err = register_fib_notifier(&init_net, &mp->fib_nb, + mlx5_lag_fib_event_flush, NULL); if (err) mp->fib_nb.notifier_call = NULL; @@ -326,6 +323,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) if (!mp->fib_nb.notifier_call) return; - unregister_fib_notifier(&mp->fib_nb); + unregister_fib_notifier(&init_net, &mp->fib_nb); mp->fib_nb.notifier_call = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 14dcc786926d..2b59f84b14f9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -127,6 +127,16 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_res_query_enabled); +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev) +{ + return rev->minor > req_rev->minor || + (rev->minor == req_rev->minor && + rev->subminor >= req_rev->subminor); +} +EXPORT_SYMBOL(mlxsw_core_fw_rev_minor_subminor_validate); + struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; @@ -985,6 +995,7 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, static int mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, + bool netns_change, struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); @@ -1005,7 +1016,7 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, return mlxsw_core_bus_device_register(mlxsw_core->bus_info, mlxsw_core->bus, mlxsw_core->bus_priv, true, - devlink); + devlink, extack); } static int mlxsw_devlink_flash_update(struct devlink *devlink, @@ -1098,7 +1109,8 @@ static int __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink) + struct devlink *devlink, + struct netlink_ext_ack *extack) { const char *device_kind = mlxsw_bus_info->device_kind; struct mlxsw_core *mlxsw_core; @@ -1172,7 +1184,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } if (mlxsw_driver->init) { - err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack); if (err) goto err_driver_init; } @@ -1223,14 +1235,16 @@ err_devlink_alloc: int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink) + struct devlink *devlink, + struct netlink_ext_ack *extack) { bool called_again = false; int err; again: err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus, - bus_priv, reload, devlink); + bus_priv, reload, + devlink, extack); /* -EAGAIN is returned in case the FW was updated. FW needs * a reset, so lets try to call __mlxsw_core_bus_device_register() * again. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 5d7d2ab6d155..f25037074e2d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/workqueue.h> +#include <linux/net_namespace.h> #include <net/devlink.h> #include "trap.h" @@ -23,6 +24,7 @@ struct mlxsw_core_port; struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; +struct mlxsw_fw_rev; unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); @@ -30,13 +32,18 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev); + int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink); + struct devlink *devlink, + struct netlink_ext_ack *extack); void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, bool reload); struct mlxsw_tx_info { @@ -252,7 +259,8 @@ struct mlxsw_driver { const char *kind; size_t priv_size; int (*init)(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info); + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack); void (*fini)(struct mlxsw_core *mlxsw_core); int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core); int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port, @@ -350,6 +358,11 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, #define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id) \ mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id) +static inline struct net *mlxsw_core_net(struct mlxsw_core *mlxsw_core) +{ + return devlink_net(priv_to_devlink(mlxsw_core)); +} + #define MLXSW_BUS_F_TXRX BIT(0) #define MLXSW_BUS_F_RESET BIT(1) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 5b00726c4346..9bf8da5f6daf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -41,7 +41,7 @@ struct mlxsw_hwmon { struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; u8 sensor_count; - u8 module_sensor_count; + u8 module_sensor_max; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -56,7 +56,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -79,7 +79,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -109,7 +109,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, return -EINVAL; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -336,7 +336,7 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; int index = mlwsw_hwmon_attr->type_index - - mlxsw_hwmon->module_sensor_count + 1; + mlxsw_hwmon->module_sensor_max + 1; return sprintf(buf, "gearbox %03u\n", index); } @@ -528,51 +528,45 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) { - unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core); - char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0}; - int i, index; - u8 width; - int err; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 module_sensor_max; + int i, err; if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core)) return 0; + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &module_sensor_max); + /* Add extra attributes for module temperature. Sensor index is * assigned to sensor_count value, while all indexed before * sensor_count are already utilized by the sensors connected through * mtmp register by mlxsw_hwmon_temp_init(). */ - index = mlxsw_hwmon->sensor_count; - for (i = 1; i < module_count; i++) { - mlxsw_reg_pmlp_pack(pmlp_pl, i); - err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp), - pmlp_pl); - if (err) { - dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n", - i); - return err; - } - width = mlxsw_reg_pmlp_width_get(pmlp_pl); - if (!width) - continue; + mlxsw_hwmon->module_sensor_max = mlxsw_hwmon->sensor_count + + module_sensor_max; + for (i = mlxsw_hwmon->sensor_count; + i < mlxsw_hwmon->module_sensor_max; i++) { mlxsw_hwmon_attr_add(mlxsw_hwmon, - MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index, - index); + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, - index, index); + i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, - MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, - index, index); + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, i, + i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, - index, index); + i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, - index, index); - index++; + i, i); } - mlxsw_hwmon->module_sensor_count = index; return 0; } @@ -590,14 +584,14 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL); + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL, NULL); if (!gbox_num) return 0; - index = mlxsw_hwmon->module_sensor_count; - max_index = mlxsw_hwmon->module_sensor_count + gbox_num; + index = mlxsw_hwmon->module_sensor_max; + max_index = mlxsw_hwmon->module_sensor_max + gbox_num; while (index < max_index) { - sensor_index = index % mlxsw_hwmon->module_sensor_count + + sensor_index = index % mlxsw_hwmon->module_sensor_max + MLXSW_REG_MTMP_GBOX_INDEX_MIN; mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 35a1dc89c28a..c721b171bd8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -112,6 +112,7 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; + u8 tz_module_num; struct mlxsw_thermal_module *tz_gearbox_arr; u8 tz_gearbox_num; unsigned int tz_highest_score; @@ -775,23 +776,10 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) static int mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, - struct mlxsw_thermal *thermal, u8 local_port) + struct mlxsw_thermal *thermal, u8 module) { struct mlxsw_thermal_module *module_tz; - char pmlp_pl[MLXSW_REG_PMLP_LEN]; - u8 width, module; - int err; - - mlxsw_reg_pmlp_pack(pmlp_pl, local_port); - err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl); - if (err) - return err; - width = mlxsw_reg_pmlp_width_get(pmlp_pl); - if (!width) - return 0; - - module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); module_tz = &thermal->tz_module_arr[module]; /* Skip if parent is already set (case of port split). */ if (module_tz->parent) @@ -819,26 +807,34 @@ static int mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal) { - unsigned int module_count = mlxsw_core_max_ports(core); struct mlxsw_thermal_module *module_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; int i, err; if (!mlxsw_core_res_query_enabled(core)) return 0; - thermal->tz_module_arr = kcalloc(module_count, + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &thermal->tz_module_num); + + thermal->tz_module_arr = kcalloc(thermal->tz_module_num, sizeof(*thermal->tz_module_arr), GFP_KERNEL); if (!thermal->tz_module_arr) return -ENOMEM; - for (i = 1; i < module_count; i++) { + for (i = 0; i < thermal->tz_module_num; i++) { err = mlxsw_thermal_module_init(dev, core, thermal, i); if (err) goto err_unreg_tz_module_arr; } - for (i = 0; i < module_count - 1; i++) { + for (i = 0; i < thermal->tz_module_num; i++) { module_tz = &thermal->tz_module_arr[i]; if (!module_tz->parent) continue; @@ -850,7 +846,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, return 0; err_unreg_tz_module_arr: - for (i = module_count - 1; i >= 0; i--) + for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); return err; @@ -859,13 +855,12 @@ err_unreg_tz_module_arr: static void mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) { - unsigned int module_count = mlxsw_core_max_ports(thermal->core); int i; if (!mlxsw_core_res_query_enabled(thermal->core)) return; - for (i = module_count - 1; i >= 0; i--) + for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); } @@ -913,7 +908,8 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL); + mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL, + NULL); if (!thermal->tz_gearbox_num) return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 95f408d0e103..34566eb62c47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -640,7 +640,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info, &mlxsw_i2c_bus, mlxsw_i2c, false, - NULL); + NULL, NULL); if (err) { dev_err(&client->dev, "Fail to register core bus\n"); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 471b0ca6d69a..2b543911ae00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -16,6 +16,14 @@ static const char mlxsw_m_driver_name[] = "mlxsw_minimal"; +#define MLXSW_M_FWREV_MINOR 2000 +#define MLXSW_M_FWREV_SUBMINOR 1886 + +static const struct mlxsw_fw_rev mlxsw_m_fw_rev = { + .minor = MLXSW_M_FWREV_MINOR, + .subminor = MLXSW_M_FWREV_SUBMINOR, +}; + struct mlxsw_m_port; struct mlxsw_m { @@ -172,6 +180,7 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) } SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev); + dev_net_set(dev, mlxsw_core_net(mlxsw_m->core)); mlxsw_m_port = netdev_priv(dev); mlxsw_m_port->dev = dev; mlxsw_m_port->mlxsw_m = mlxsw_m; @@ -325,8 +334,27 @@ static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m) kfree(mlxsw_m->ports); } +static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m) +{ + const struct mlxsw_fw_rev *rev = &mlxsw_m->bus_info->fw_rev; + + /* Validate driver and FW are compatible. + * Do not check major version, since it defines chip type, while + * driver is supposed to support any type. + */ + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, &mlxsw_m_fw_rev)) + return 0; + + dev_err(mlxsw_m->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, rev->major, + mlxsw_m_fw_rev.minor, mlxsw_m_fw_rev.subminor); + + return -EINVAL; +} + static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); int err; @@ -334,6 +362,10 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, mlxsw_m->core = mlxsw_core; mlxsw_m->bus_info = mlxsw_bus_info; + err = mlxsw_m_fw_rev_validate(mlxsw_m); + if (err) + return err; + err = mlxsw_m_base_mac_get(mlxsw_m); if (err) { dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 615455a21567..914c33e46fb4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -284,15 +284,18 @@ static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q, static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { + int tclass; int i; int err; q->producer_counter = 0; q->consumer_counter = 0; + tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC : + MLXSW_PCI_SDQ_CTL_TC; /* Set CQ of same number of this SDQ. */ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); - mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 3); + mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); @@ -963,6 +966,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox); if (num_sdqs + num_rdqs > num_cqs || + num_sdqs < MLXSW_PCI_SDQS_MIN || num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) { dev_err(&pdev->dev, "Unsupported number of queues\n"); return -EINVAL; @@ -1520,7 +1524,15 @@ static struct mlxsw_pci_queue * mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, const struct mlxsw_tx_info *tx_info) { - u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci); + u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1; + u8 sdqn; + + if (tx_info->is_emad) { + sdqn = MLXSW_PCI_SDQ_EMAD_INDEX; + } else { + BUILD_BUG_ON(MLXSW_PCI_SDQ_EMAD_INDEX != 0); + sdqn = 1 + (tx_info->local_port % ctl_sdq_count); + } return mlxsw_pci_sdq_get(mlxsw_pci, sdqn); } @@ -1790,7 +1802,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus, mlxsw_pci, false, - NULL); + NULL, NULL); if (err) { dev_err(&pdev->dev, "cannot register bus device\n"); goto err_bus_device_register; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index e57e42e2d2b2..2b3aec482742 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -51,6 +51,11 @@ #define MLXSW_PCI_EQ_ASYNC_NUM 0 #define MLXSW_PCI_EQ_COMP_NUM 1 +#define MLXSW_PCI_SDQS_MIN 2 /* EMAD and control traffic */ +#define MLXSW_PCI_SDQ_EMAD_INDEX 0 +#define MLXSW_PCI_SDQ_EMAD_TC 0 +#define MLXSW_PCI_SDQ_CTL_TC 3 + #define MLXSW_PCI_AQ_PAGES 8 #define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5494cf93f34c..f5e39758c6ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4111,6 +4111,7 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); #define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4 BIT(9) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2 BIT(10) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4 BIT(12) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8 BIT(15) /* reg_ptys_ext_eth_proto_cap * Extended Ethernet port supported speeds and protocols. @@ -9531,6 +9532,12 @@ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); */ MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); +/* num_of_modules + * Number of modules. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8); + static inline void mlxsw_reg_mgpir_pack(char *payload) { MLXSW_REG_ZERO(mgpir, payload); @@ -9539,7 +9546,7 @@ static inline void mlxsw_reg_mgpir_pack(char *payload) static inline void mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, enum mlxsw_reg_mgpir_device_type *device_type, - u8 *devices_per_flash) + u8 *devices_per_flash, u8 *num_of_modules) { if (num_of_devices) *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload); @@ -9548,6 +9555,8 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, if (devices_per_flash) *devices_per_flash = mlxsw_reg_mgpir_devices_per_flash_get(payload); + if (num_of_modules) + *num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload); } /* TNGCR - Tunneling NVE General Configuration Register diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index dcf9562bce8a..1275d21e8fbd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -409,9 +409,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) } if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) == MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) && - (rev->minor > req_rev->minor || - (rev->minor == req_rev->minor && - rev->subminor >= req_rev->subminor))) + mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) return 0; dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n", @@ -2914,9 +2912,22 @@ mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = { #define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \ ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4) +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_400gaui_8[] = { + ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_400gaui_8) + #define MLXSW_SP_PORT_MASK_WIDTH_1X BIT(0) #define MLXSW_SP_PORT_MASK_WIDTH_2X BIT(1) #define MLXSW_SP_PORT_MASK_WIDTH_4X BIT(2) +#define MLXSW_SP_PORT_MASK_WIDTH_8X BIT(3) static u8 mlxsw_sp_port_mask_width_get(u8 width) { @@ -2927,6 +2938,8 @@ static u8 mlxsw_sp_port_mask_width_get(u8 width) return MLXSW_SP_PORT_MASK_WIDTH_2X; case 4: return MLXSW_SP_PORT_MASK_WIDTH_4X; + case 8: + return MLXSW_SP_PORT_MASK_WIDTH_8X; default: WARN_ON_ONCE(1); return 0; @@ -2948,7 +2961,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN, .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X, + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_100, }, { @@ -2957,7 +2971,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN, .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X, + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_1000, }, { @@ -2966,7 +2981,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN, .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X, + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_2500, }, { @@ -2975,7 +2991,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN, .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X, + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_5000, }, { @@ -2984,14 +3001,16 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN, .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X, + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G, .mask_ethtool = mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_40000, }, { @@ -3000,7 +3019,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN, .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X, + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_25000, }, { @@ -3008,7 +3028,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN, .mask_width = MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X, + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_50000, }, { @@ -3022,7 +3043,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_100000, }, { @@ -3036,9 +3058,17 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_200000, }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8, + .mask_ethtool = mlxsw_sp2_mask_ethtool_400gaui_8, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_400000, + }, }; #define MLXSW_SP2_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp2_port_link_mode) @@ -3635,6 +3665,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_etherdev; } SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev); + dev_net_set(dev, mlxsw_sp_net(mlxsw_sp)); mlxsw_sp_port = netdev_priv(dev); mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; @@ -4738,7 +4769,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr); static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); int err; @@ -4831,7 +4863,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } - err = mlxsw_sp_router_init(mlxsw_sp); + err = mlxsw_sp_router_init(mlxsw_sp, extack); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); goto err_router_init; @@ -4864,7 +4896,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, * respin. */ mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; - err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); goto err_netdev_notifier; @@ -4887,7 +4920,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_ports_create: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: - unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); err_netdev_notifier: if (mlxsw_sp->clock) mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); @@ -4924,7 +4958,8 @@ err_fids_init: } static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); @@ -4944,11 +4979,12 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->listeners = mlxsw_sp1_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); - return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); @@ -4964,7 +5000,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; - return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) @@ -4973,7 +5009,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); - unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); if (mlxsw_sp->clock) { mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); @@ -5165,14 +5202,61 @@ static int mlxsw_sp2_resources_kvd_register(struct mlxsw_core *mlxsw_core) &kvd_size_params); } +static int mlxsw_sp_resources_span_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params span_size_params; + u32 max_span; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SPAN)) + return -EIO; + + max_span = MLXSW_CORE_RES_GET(mlxsw_core, MAX_SPAN); + devlink_resource_size_params_init(&span_size_params, max_span, max_span, + 1, DEVLINK_RESOURCE_UNIT_ENTRY); + + return devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_SPAN, + max_span, MLXSW_SP_RESOURCE_SPAN, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &span_size_params); +} + static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) { - return mlxsw_sp1_resources_kvd_register(mlxsw_core); + int err; + + err = mlxsw_sp1_resources_kvd_register(mlxsw_core); + if (err) + return err; + + err = mlxsw_sp_resources_span_register(mlxsw_core); + if (err) + goto err_resources_span_register; + + return 0; + +err_resources_span_register: + devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); + return err; } static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) { - return mlxsw_sp2_resources_kvd_register(mlxsw_core); + int err; + + err = mlxsw_sp2_resources_kvd_register(mlxsw_core); + if (err) + return err; + + err = mlxsw_sp_resources_span_register(mlxsw_core); + if (err) + goto err_resources_span_register; + + return 0; + +err_resources_span_register: + devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); + return err; } static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b2a0028b1694..a5fdd84b4ca7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -14,6 +14,7 @@ #include <linux/dcbnl.h> #include <linux/in6.h> #include <linux/notifier.h> +#include <linux/net_namespace.h> #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> @@ -47,6 +48,8 @@ #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks" #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks" +#define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents" + enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD = 1, MLXSW_SP_RESOURCE_KVD_LINEAR, @@ -55,6 +58,7 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, + MLXSW_SP_RESOURCE_SPAN, }; struct mlxsw_sp_port; @@ -524,7 +528,8 @@ union mlxsw_sp_l3addr { struct in6_addr addr6; }; -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, unsigned long event, void *ptr); @@ -982,4 +987,9 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group); +static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_net(mlxsw_sp->core); +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index 17f334b46c40..2153bcc4b585 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -870,7 +870,7 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_vni(fid, &vni))) goto out; - nve_dev = dev_get_by_index(&init_net, nve_ifindex); + nve_dev = dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); if (!nve_dev) goto out; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a330b369e899..0e99b64450ca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -16,6 +16,7 @@ #include <linux/if_macvlan.h> #include <linux/refcount.h> #include <linux/jhash.h> +#include <linux/net_namespace.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -2551,14 +2552,14 @@ static int mlxsw_sp_router_schedule_work(struct net *net, struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_router *router; - if (!net_eq(net, &init_net)) + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp))) return NOTIFY_DONE; net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); if (!net_work) return NOTIFY_BAD; - router = container_of(nb, struct mlxsw_sp_router, netevent_nb); INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; mlxsw_core_schedule_work(&net_work->work); @@ -6019,12 +6020,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, @@ -6065,12 +6060,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) fib_work->fib6_work.nrt6); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; } rtnl_unlock(); kfree(fib_work); @@ -6112,12 +6101,6 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) &fib_work->ven_info); dev_put(fib_work->ven_info.dev); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; } rtnl_unlock(); kfree(fib_work); @@ -6213,7 +6196,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, rule = fr_info->rule; /* Rule only affects locally generated traffic */ - if (rule->iifindex == info->net->loopback_dev->ifindex) + if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex) return 0; switch (info->family) { @@ -6250,8 +6233,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_router *router; int err; - if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6 && + if ((info->family != AF_INET && info->family != AF_INET6 && info->family != RTNL_FAMILY_IPMR && info->family != RTNL_FAMILY_IP6MR)) return NOTIFY_DONE; @@ -6263,9 +6245,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, case FIB_EVENT_RULE_DEL: err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); - if (!err || info->extack) - return notifier_from_errno(err); - break; + return notifier_from_errno(err); case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_APPEND: /* fall through */ @@ -7974,9 +7954,10 @@ static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field) mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true); } -static void mlxsw_sp_mp4_hash_init(char *recr2_pl) +static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl) { - bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy; + struct net *net = mlxsw_sp_net(mlxsw_sp); + bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy; mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP); @@ -7991,9 +7972,9 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl) mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT); } -static void mlxsw_sp_mp6_hash_init(char *recr2_pl) +static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl) { - bool only_l3 = !ip6_multipath_hash_policy(&init_net); + bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp)); mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); @@ -8021,8 +8002,8 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0); mlxsw_reg_recr2_pack(recr2_pl, seed); - mlxsw_sp_mp4_hash_init(recr2_pl); - mlxsw_sp_mp6_hash_init(recr2_pl); + mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl); + mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); } @@ -8053,7 +8034,8 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { - bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority; + struct net *net = mlxsw_sp_net(mlxsw_sp); + bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; int err; @@ -8079,7 +8061,8 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) { struct mlxsw_sp_router *router; int err; @@ -8155,8 +8138,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) goto err_dscp_init; mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; - err = register_fib_notifier(&mlxsw_sp->router->fib_nb, - mlxsw_sp_router_fib_dump_flush); + err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb, + mlxsw_sp_router_fib_dump_flush, extack); if (err) goto err_register_fib_notifier; @@ -8195,7 +8179,8 @@ err_register_inetaddr_notifier: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { - unregister_fib_notifier(&mlxsw_sp->router->fib_nb); + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb); unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 560a60e522f9..200d324e6d99 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -14,8 +14,23 @@ #include "spectrum_span.h" #include "spectrum_switchdev.h" +static u64 mlxsw_sp_span_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + u64 occ = 0; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + if (mlxsw_sp->span.entries[i].ref_count) + occ++; + } + + return occ; +} + int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) { + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) @@ -36,13 +51,19 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) curr->id = i; } + devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN, + mlxsw_sp_span_occ_get, mlxsw_sp); + return 0; } void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) { + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; + devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN); + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 5ecb45118400..a3af171c6358 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2591,7 +2591,7 @@ __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp, if (err) return err; - dev = __dev_get_by_index(&init_net, nve_ifindex); + dev = __dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); if (!dev) return -EINVAL; *nve_dev = dev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index 0d9356b3f65d..4ff1e623aa76 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -446,7 +446,8 @@ static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) } static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core); int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 1c14c051ee52..de6cb22f68b1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -992,6 +992,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, if (!dev) return -ENOMEM; SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev); + dev_net_set(dev, mlxsw_core_net(mlxsw_sx->core)); mlxsw_sx_port = netdev_priv(dev); mlxsw_sx_port->dev = dev; mlxsw_sx_port->mlxsw_sx = mlxsw_sx; @@ -1563,7 +1564,8 @@ static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) } static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); int err; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 5afcb3c4c2ef..c80bb83c8ac9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -3952,7 +3952,7 @@ static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta1, *meta2; - const s32 exp_mask[] = { + static const s32 exp_mask[] = { [BPF_B] = 0x000000ffU, [BPF_H] = 0x0000ffffU, [BPF_W] = 0xffffffffU, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 61aabffc8888..bcdcd6de7dea 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -872,7 +872,8 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, /* jump forward, a TX may have gotten lost, need to sync TX */ if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4) - tls_offload_tx_resync_request(nskb->sk); + tls_offload_tx_resync_request(nskb->sk, seq, + ntls->next_seq); *nr_frags = 0; return nskb; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 544012a67221..ebb81d6d4ca1 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -15,6 +15,7 @@ #include <linux/etherdevice.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_mdio.h> #include <linux/of_net.h> #include <linux/phy.h> #include <linux/platform_device.h> @@ -391,6 +392,7 @@ struct rx_status_t { struct netdata_local { struct platform_device *pdev; struct net_device *ndev; + struct device_node *phy_node; spinlock_t lock; void __iomem *net_base; u32 msg_enable; @@ -749,22 +751,26 @@ static void lpc_handle_link_change(struct net_device *ndev) static int lpc_mii_probe(struct net_device *ndev) { struct netdata_local *pldat = netdev_priv(ndev); - struct phy_device *phydev = phy_find_first(pldat->mii_bus); - - if (!phydev) { - netdev_err(ndev, "no PHY found\n"); - return -ENODEV; - } + struct phy_device *phydev; /* Attach to the PHY */ if (lpc_phy_interface_mode(&pldat->pdev->dev) == PHY_INTERFACE_MODE_MII) netdev_info(ndev, "using MII interface\n"); else netdev_info(ndev, "using RMII interface\n"); + + if (pldat->phy_node) + phydev = of_phy_find_device(pldat->phy_node); + else + phydev = phy_find_first(pldat->mii_bus); + if (!phydev) { + netdev_err(ndev, "no PHY found\n"); + return -ENODEV; + } + phydev = phy_connect(ndev, phydev_name(phydev), &lpc_handle_link_change, lpc_phy_interface_mode(&pldat->pdev->dev)); - if (IS_ERR(phydev)) { netdev_err(ndev, "Could not attach to PHY\n"); return PTR_ERR(phydev); @@ -783,6 +789,7 @@ static int lpc_mii_probe(struct net_device *ndev) static int lpc_mii_init(struct netdata_local *pldat) { + struct device_node *node; int err = -ENXIO; pldat->mii_bus = mdiobus_alloc(); @@ -812,7 +819,10 @@ static int lpc_mii_init(struct netdata_local *pldat) platform_set_drvdata(pldat->pdev, pldat->mii_bus); - if (mdiobus_register(pldat->mii_bus)) + node = of_get_child_by_name(pldat->pdev->dev.of_node, "mdio"); + err = of_mdiobus_register(pldat->mii_bus, node); + of_node_put(node); + if (err) goto err_out_unregister_bus; if (lpc_mii_probe(pldat->ndev) != 0) @@ -1345,6 +1355,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) netdev_dbg(ndev, "DMA buffer V address :0x%p\n", pldat->dma_buff_base_v); + pldat->phy_node = of_parse_phandle(np, "phy-handle", 0); + /* Get MAC address from current HW setting (POR state is all zeros) */ __lpc_get_mac(pldat, ndev->dev_addr); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index af1647afa4e8..6fb27dcc5787 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -19,31 +19,30 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req, err = devlink_info_driver_name_put(req, IONIC_DRV_NAME); if (err) - goto info_out; + return err; err = devlink_info_version_running_put(req, DEVLINK_INFO_VERSION_GENERIC_FW, idev->dev_info.fw_version); if (err) - goto info_out; + return err; snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_type); err = devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, buf); if (err) - goto info_out; + return err; snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_rev); err = devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, buf); if (err) - goto info_out; + return err; err = devlink_info_serial_number_put(req, idev->dev_info.serial_num); -info_out: return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 7d10265f782a..f778fff034f5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -254,12 +254,9 @@ static int ionic_set_link_ksettings(struct net_device *netdev, struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; struct ionic_dev *idev; - u32 req_rs, req_fc; - u8 fec_type; int err = 0; idev = &lif->ionic->idev; - fec_type = IONIC_PORT_FEC_TYPE_NONE; /* set autoneg */ if (ks->base.autoneg != idev->port_info->config.an_enable) { @@ -281,29 +278,6 @@ static int ionic_set_link_ksettings(struct net_device *netdev, return err; } - /* set FEC */ - req_rs = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_RS); - req_fc = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_BASER); - if (req_rs && req_fc) { - netdev_info(netdev, "Only select one FEC mode at a time\n"); - return -EINVAL; - } else if (req_fc) { - fec_type = IONIC_PORT_FEC_TYPE_FC; - } else if (req_rs) { - fec_type = IONIC_PORT_FEC_TYPE_RS; - } else if (!(req_rs | req_fc)) { - fec_type = IONIC_PORT_FEC_TYPE_NONE; - } - - if (fec_type != idev->port_info->config.fec_type) { - mutex_lock(&ionic->dev_cmd_lock); - ionic_dev_cmd_port_fec(idev, fec_type); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); - mutex_unlock(&ionic->dev_cmd_lock); - if (err) - return err; - } - return 0; } @@ -353,6 +327,70 @@ static int ionic_set_pauseparam(struct net_device *netdev, return 0; } +static int ionic_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + switch (lif->ionic->idev.port_info->config.fec_type) { + case IONIC_PORT_FEC_TYPE_NONE: + fec->active_fec = ETHTOOL_FEC_OFF; + break; + case IONIC_PORT_FEC_TYPE_RS: + fec->active_fec = ETHTOOL_FEC_RS; + break; + case IONIC_PORT_FEC_TYPE_FC: + fec->active_fec = ETHTOOL_FEC_BASER; + break; + } + + fec->fec = ETHTOOL_FEC_OFF | ETHTOOL_FEC_RS | ETHTOOL_FEC_BASER; + + return 0; +} + +static int ionic_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct ionic_lif *lif = netdev_priv(netdev); + u8 fec_type; + int ret = 0; + + if (lif->ionic->idev.port_info->config.an_enable) { + netdev_err(netdev, "FEC request not allowed while autoneg is enabled\n"); + return -EINVAL; + } + + switch (fec->fec) { + case ETHTOOL_FEC_NONE: + fec_type = IONIC_PORT_FEC_TYPE_NONE; + break; + case ETHTOOL_FEC_OFF: + fec_type = IONIC_PORT_FEC_TYPE_NONE; + break; + case ETHTOOL_FEC_RS: + fec_type = IONIC_PORT_FEC_TYPE_RS; + break; + case ETHTOOL_FEC_BASER: + fec_type = IONIC_PORT_FEC_TYPE_FC; + break; + case ETHTOOL_FEC_AUTO: + default: + netdev_err(netdev, "FEC request 0x%04x not supported\n", + fec->fec); + return -EINVAL; + } + + if (fec_type != lif->ionic->idev.port_info->config.fec_type) { + mutex_lock(&lif->ionic->dev_cmd_lock); + ionic_dev_cmd_port_fec(&lif->ionic->idev, fec_type); + ret = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT); + mutex_unlock(&lif->ionic->dev_cmd_lock); + } + + return ret; +} + static int ionic_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce) { @@ -372,7 +410,6 @@ static int ionic_set_coalesce(struct net_device *netdev, struct ionic_identity *ident; struct ionic_qcq *qcq; unsigned int i; - u32 usecs; u32 coal; if (coalesce->rx_max_coalesced_frames || @@ -410,26 +447,27 @@ static int ionic_set_coalesce(struct net_device *netdev, return -EINVAL; } + /* Convert the usec request to a HW useable value. If they asked + * for non-zero and it resolved to zero, bump it up + */ coal = ionic_coal_usec_to_hw(lif->ionic, coalesce->rx_coalesce_usecs); - - if (coal > IONIC_INTR_CTRL_COAL_MAX) - return -ERANGE; - - /* If they asked for non-zero and it resolved to zero, bump it up */ if (!coal && coalesce->rx_coalesce_usecs) coal = 1; - /* Convert it back to get device resolution */ - usecs = ionic_coal_hw_to_usec(lif->ionic, coal); + if (coal > IONIC_INTR_CTRL_COAL_MAX) + return -ERANGE; - if (usecs != lif->rx_coalesce_usecs) { - lif->rx_coalesce_usecs = usecs; + /* Save the new value */ + lif->rx_coalesce_usecs = coalesce->rx_coalesce_usecs; + if (coal != lif->rx_coalesce_hw) { + lif->rx_coalesce_hw = coal; if (test_bit(IONIC_LIF_UP, lif->state)) { for (i = 0; i < lif->nxqs; i++) { qcq = lif->rxqcqs[i].qcq; ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - qcq->intr.index, coal); + qcq->intr.index, + lif->rx_coalesce_hw); } } } @@ -453,6 +491,7 @@ static int ionic_set_ringparam(struct net_device *netdev, { struct ionic_lif *lif = netdev_priv(netdev); bool running; + int err; if (ring->rx_mini_pending || ring->rx_jumbo_pending) { netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n"); @@ -470,8 +509,9 @@ static int ionic_set_ringparam(struct net_device *netdev, ring->rx_pending == lif->nrxq_descs) return 0; - if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) - return -EBUSY; + err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET); + if (err) + return err; running = test_bit(IONIC_LIF_UP, lif->state); if (running) @@ -504,6 +544,7 @@ static int ionic_set_channels(struct net_device *netdev, { struct ionic_lif *lif = netdev_priv(netdev); bool running; + int err; if (!ch->combined_count || ch->other_count || ch->rx_count || ch->tx_count) @@ -512,8 +553,9 @@ static int ionic_set_channels(struct net_device *netdev, if (ch->combined_count == lif->nxqs) return 0; - if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) - return -EBUSY; + err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET); + if (err) + return err; running = test_bit(IONIC_LIF_UP, lif->state); if (running) @@ -747,6 +789,7 @@ static const struct ethtool_ops ionic_ethtool_ops = { .get_regs = ionic_get_regs, .get_link = ethtool_op_get_link, .get_link_ksettings = ionic_get_link_ksettings, + .set_link_ksettings = ionic_set_link_ksettings, .get_coalesce = ionic_get_coalesce, .set_coalesce = ionic_set_coalesce, .get_ringparam = ionic_get_ringparam, @@ -769,7 +812,8 @@ static const struct ethtool_ops ionic_ethtool_ops = { .get_module_eeprom = ionic_get_module_eeprom, .get_pauseparam = ionic_get_pauseparam, .set_pauseparam = ionic_set_pauseparam, - .set_link_ksettings = ionic_set_link_ksettings, + .get_fecparam = ionic_get_fecparam, + .set_fecparam = ionic_set_fecparam, .nway_reset = ionic_nway_reset, }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 72107a0627a9..559b96ae48f5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -242,6 +242,21 @@ static int ionic_qcq_disable(struct ionic_qcq *qcq) return ionic_adminq_post_wait(lif, &ctx); } +static void ionic_lif_quiesce(struct ionic_lif *lif) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .attr = IONIC_LIF_ATTR_STATE, + .index = lif->index, + .state = IONIC_LIF_DISABLE + }, + }; + + ionic_adminq_post_wait(lif, &ctx); +} + static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq) { struct ionic_dev *idev = &lif->ionic->idev; @@ -1430,7 +1445,6 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) unsigned int flags; unsigned int i; int err = 0; - u32 coal; flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG; for (i = 0; i < lif->nxqs; i++) { @@ -1447,7 +1461,6 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) } flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_INTR; - coal = ionic_coal_usec_to_hw(lif->ionic, lif->rx_coalesce_usecs); for (i = 0; i < lif->nxqs; i++) { err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, lif->nrxq_descs, @@ -1460,7 +1473,8 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats; ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[i].qcq->intr.index, coal); + lif->rxqcqs[i].qcq->intr.index, + lif->rx_coalesce_hw); ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq, lif->txqcqs[i].qcq); } @@ -1590,6 +1604,7 @@ int ionic_stop(struct net_device *netdev) netif_tx_disable(netdev); ionic_txrx_disable(lif); + ionic_lif_quiesce(lif); ionic_txrx_deinit(lif); ionic_txrx_free(lif); @@ -1619,8 +1634,9 @@ int ionic_reset_queues(struct ionic_lif *lif) /* Put off the next watchdog timeout */ netif_trans_update(lif->netdev); - if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET)) - return -EBUSY; + err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET); + if (err) + return err; running = netif_running(lif->netdev); if (running) @@ -1639,7 +1655,6 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index struct net_device *netdev; struct ionic_lif *lif; int tbl_sz; - u32 coal; int err; netdev = alloc_etherdev_mqs(sizeof(*lif), @@ -1670,8 +1685,9 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index lif->nrxq_descs = IONIC_DEF_TXRX_DESC; /* Convert the default coalesce value to actual hw resolution */ - coal = ionic_coal_usec_to_hw(lif->ionic, IONIC_ITR_COAL_USEC_DEFAULT); - lif->rx_coalesce_usecs = ionic_coal_hw_to_usec(lif->ionic, coal); + lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT; + lif->rx_coalesce_hw = ionic_coal_hw_to_usec(lif->ionic, + lif->rx_coalesce_usecs); snprintf(lif->name, sizeof(lif->name), "lif%u", index); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 6a95b42a8d8c..a55fd1f8c31b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -175,7 +175,9 @@ struct ionic_lif { unsigned long *dbid_inuse; unsigned int dbid_count; struct dentry *dentry; - u32 rx_coalesce_usecs; + u32 rx_coalesce_usecs; /* what the user asked for */ + u32 rx_coalesce_hw; /* what the hw is using */ + u32 flags; struct work_struct tx_timeout_work; }; @@ -187,15 +189,10 @@ struct ionic_lif { #define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q) #define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q) +/* return 0 if successfully set the bit, else non-zero */ static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname) { - unsigned long tlimit = jiffies + HZ; - - while (test_and_set_bit(bitname, lif->state) && - time_before(jiffies, tlimit)) - usleep_range(100, 200); - - return test_bit(bitname, lif->state); + return wait_on_bit_lock(lif->state, bitname, TASK_INTERRUPTIBLE); } static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 786b158bd305..bc4f951315da 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2189,9 +2189,6 @@ static int rocker_router_fib_event(struct notifier_block *nb, struct rocker_fib_event_work *fib_work; struct fib_notifier_info *info = ptr; - if (!net_eq(info->net, &init_net)) - return NOTIFY_DONE; - if (info->family != AF_INET) return NOTIFY_DONE; @@ -2994,7 +2991,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) * the device, so no need to pass a callback. */ rocker->fib_nb.notifier_call = rocker_router_fib_event; - err = register_fib_notifier(&rocker->fib_nb, NULL); + err = register_fib_notifier(&init_net, &rocker->fib_nb, NULL, NULL); if (err) goto err_register_fib_notifier; @@ -3021,7 +3018,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_register_switchdev_blocking_notifier: unregister_switchdev_notifier(&rocker_switchdev_notifier); err_register_switchdev_notifier: - unregister_fib_notifier(&rocker->fib_nb); + unregister_fib_notifier(&init_net, &rocker->fib_nb); err_register_fib_notifier: rocker_remove_ports(rocker); err_probe_ports: @@ -3057,7 +3054,7 @@ static void rocker_remove(struct pci_dev *pdev) unregister_switchdev_blocking_notifier(nb); unregister_switchdev_notifier(&rocker_switchdev_notifier); - unregister_fib_notifier(&rocker->fib_nb); + unregister_fib_notifier(&init_net, &rocker->fib_nb); rocker_remove_ports(rocker); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); destroy_workqueue(rocker->rocker_owq); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index f9e6744d8fd6..869a498e3b5e 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -252,7 +252,6 @@ #define NETSEC_XDP_CONSUMED BIT(0) #define NETSEC_XDP_TX BIT(1) #define NETSEC_XDP_REDIR BIT(2) -#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR) enum ring_id { NETSEC_RING_TX = 0, @@ -661,6 +660,7 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv) bytes += desc->skb->len; dev_kfree_skb(desc->skb); } else { + bytes += desc->xdpf->len; xdp_return_frame(desc->xdpf); } next: @@ -847,8 +847,8 @@ static u32 netsec_xdp_queue_one(struct netsec_priv *priv, enum dma_data_direction dma_dir = page_pool_get_dma_dir(rx_ring->page_pool); - dma_handle = page_pool_get_dma_addr(page) + - NETSEC_RXBUF_HEADROOM; + dma_handle = page_pool_get_dma_addr(page) + xdpf->headroom + + sizeof(*xdpf); dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len, dma_dir); tx_desc.buf_type = TYPE_NETSEC_XDP_TX; @@ -858,6 +858,7 @@ static u32 netsec_xdp_queue_one(struct netsec_priv *priv, tx_desc.addr = xdpf->data; tx_desc.len = xdpf->len; + netdev_sent_queue(priv->ndev, xdpf->len); netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf); return NETSEC_XDP_TX; @@ -1030,7 +1031,7 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) next: if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) || - xdp_result & NETSEC_XDP_RX_OK) { + xdp_result) { ndev->stats.rx_packets++; ndev->stats.rx_bytes += xdp.data_end - xdp.data; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 79f2ee37afed..cea7a0c7ce68 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -130,6 +130,31 @@ static void mt2712_delay_ps2stage(struct mediatek_dwmac_plat_data *plat) } } +static void mt2712_delay_stage2ps(struct mediatek_dwmac_plat_data *plat) +{ + struct mac_delay_struct *mac_delay = &plat->mac_delay; + + switch (plat->phy_mode) { + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_RMII: + /* 550ps per stage for MII/RMII */ + mac_delay->tx_delay *= 550; + mac_delay->rx_delay *= 550; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + /* 170ps per stage for RGMII */ + mac_delay->tx_delay *= 170; + mac_delay->rx_delay *= 170; + break; + default: + dev_err(plat->dev, "phy interface not supported\n"); + break; + } +} + static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat) { struct mac_delay_struct *mac_delay = &plat->mac_delay; @@ -199,6 +224,8 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat) regmap_write(plat->peri_regmap, PERI_ETH_DLY, delay_val); regmap_write(plat->peri_regmap, PERI_ETH_DLY_FINE, fine_val); + mt2712_delay_stage2ps(plat); + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 3d69da112625..d0356fbd1e43 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -130,7 +130,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits, writel(mcfilterbits[0], ioaddr + GMAC_HASH_LOW); writel(mcfilterbits[1], ioaddr + GMAC_HASH_HIGH); return; - break; case 7: numhashregs = 4; break; @@ -140,7 +139,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits, default: pr_debug("STMMAC: err in setting multicast filter\n"); return; - break; } for (regs = 0; regs < numhashregs; regs++) writel(mcfilterbits[regs], diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 89a3420eba42..07e97f45755d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -43,6 +43,10 @@ #define GMAC_ARP_ADDR 0x00000210 #define GMAC_ADDR_HIGH(reg) (0x300 + reg * 8) #define GMAC_ADDR_LOW(reg) (0x304 + reg * 8) +#define GMAC_L3L4_CTRL(reg) (0x900 + (reg) * 0x30) +#define GMAC_L4_ADDR(reg) (0x904 + (reg) * 0x30) +#define GMAC_L3_ADDR0(reg) (0x910 + (reg) * 0x30) +#define GMAC_L3_ADDR1(reg) (0x914 + (reg) * 0x30) /* RX Queues Routing */ #define GMAC_RXQCTRL_AVCPQ_MASK GENMASK(2, 0) @@ -67,6 +71,7 @@ #define GMAC_PACKET_FILTER_PCF BIT(7) #define GMAC_PACKET_FILTER_HPF BIT(10) #define GMAC_PACKET_FILTER_VTFE BIT(16) +#define GMAC_PACKET_FILTER_IPFE BIT(20) #define GMAC_MAX_PERFECT_ADDRESSES 128 @@ -202,9 +207,11 @@ enum power_event { #define GMAC_HW_FEAT_MIISEL BIT(0) /* MAC HW features1 bitmap */ +#define GMAC_HW_FEAT_L3L4FNUM GENMASK(30, 27) #define GMAC_HW_HASH_TB_SZ GENMASK(25, 24) #define GMAC_HW_FEAT_AVSEL BIT(20) #define GMAC_HW_TSOEN BIT(18) +#define GMAC_HW_ADDR64 GENMASK(15, 14) #define GMAC_HW_TXFIFOSIZE GENMASK(10, 6) #define GMAC_HW_RXFIFOSIZE GENMASK(4, 0) @@ -227,6 +234,21 @@ enum power_event { #define GMAC_HI_DCS_SHIFT 16 #define GMAC_HI_REG_AE BIT(31) +/* L3/L4 Filters regs */ +#define GMAC_L4DPIM0 BIT(21) +#define GMAC_L4DPM0 BIT(20) +#define GMAC_L4SPIM0 BIT(19) +#define GMAC_L4SPM0 BIT(18) +#define GMAC_L4PEN0 BIT(16) +#define GMAC_L3DAIM0 BIT(5) +#define GMAC_L3DAM0 BIT(4) +#define GMAC_L3SAIM0 BIT(3) +#define GMAC_L3SAM0 BIT(2) +#define GMAC_L3PEN0 BIT(0) +#define GMAC_L4DP0 GENMASK(31, 16) +#define GMAC_L4DP0_SHIFT 16 +#define GMAC_L4SP0 GENMASK(15, 0) + /* MTL registers */ #define MTL_OPERATION_MODE 0x00000c00 #define MTL_FRPE BIT(15) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 5a7b0aca1d31..bec929daf703 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -733,7 +733,7 @@ static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable) } static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash, - bool is_double) + u16 perfect_match, bool is_double) { void __iomem *ioaddr = hw->pcsr; @@ -748,6 +748,16 @@ static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash, } writel(value, ioaddr + GMAC_VLAN_TAG); + } else if (perfect_match) { + u32 value = GMAC_VLAN_ETV; + + if (is_double) { + value |= GMAC_VLAN_EDVLP; + value |= GMAC_VLAN_ESVL; + value |= GMAC_VLAN_DOVLTC; + } + + writel(value | perfect_match, ioaddr + GMAC_VLAN_TAG); } else { u32 value = readl(ioaddr + GMAC_VLAN_TAG); @@ -799,6 +809,106 @@ static void dwmac4_set_arp_offload(struct mac_device_info *hw, bool en, writel(value, ioaddr + GMAC_CONFIG); } +static int dwmac4_config_l3_filter(struct mac_device_info *hw, u32 filter_no, + bool en, bool ipv6, bool sa, bool inv, + u32 match) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + value = readl(ioaddr + GMAC_PACKET_FILTER); + value |= GMAC_PACKET_FILTER_IPFE; + writel(value, ioaddr + GMAC_PACKET_FILTER); + + value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no)); + + /* For IPv6 not both SA/DA filters can be active */ + if (ipv6) { + value |= GMAC_L3PEN0; + value &= ~(GMAC_L3SAM0 | GMAC_L3SAIM0); + value &= ~(GMAC_L3DAM0 | GMAC_L3DAIM0); + if (sa) { + value |= GMAC_L3SAM0; + if (inv) + value |= GMAC_L3SAIM0; + } else { + value |= GMAC_L3DAM0; + if (inv) + value |= GMAC_L3DAIM0; + } + } else { + value &= ~GMAC_L3PEN0; + if (sa) { + value |= GMAC_L3SAM0; + if (inv) + value |= GMAC_L3SAIM0; + } else { + value |= GMAC_L3DAM0; + if (inv) + value |= GMAC_L3DAIM0; + } + } + + writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + if (sa) { + writel(match, ioaddr + GMAC_L3_ADDR0(filter_no)); + } else { + writel(match, ioaddr + GMAC_L3_ADDR1(filter_no)); + } + + if (!en) + writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + return 0; +} + +static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no, + bool en, bool udp, bool sa, bool inv, + u32 match) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + value = readl(ioaddr + GMAC_PACKET_FILTER); + value |= GMAC_PACKET_FILTER_IPFE; + writel(value, ioaddr + GMAC_PACKET_FILTER); + + value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no)); + if (udp) { + value |= GMAC_L4PEN0; + } else { + value &= ~GMAC_L4PEN0; + } + + value &= ~(GMAC_L4SPM0 | GMAC_L4SPIM0); + value &= ~(GMAC_L4DPM0 | GMAC_L4DPIM0); + if (sa) { + value |= GMAC_L4SPM0; + if (inv) + value |= GMAC_L4SPIM0; + } else { + value |= GMAC_L4DPM0; + if (inv) + value |= GMAC_L4DPIM0; + } + + writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + if (sa) { + value = match & GMAC_L4SP0; + } else { + value = (match << GMAC_L4DP0_SHIFT) & GMAC_L4DP0; + } + + writel(value, ioaddr + GMAC_L4_ADDR(filter_no)); + + if (!en) + writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no)); + + return 0; +} + const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_set_mac, @@ -828,11 +938,14 @@ const struct stmmac_ops dwmac4_ops = { .pcs_get_adv_lp = dwmac4_get_adv_lp, .debug = dwmac4_debug, .set_filter = dwmac4_set_filter, + .flex_pps_config = dwmac5_flex_pps_config, .set_mac_loopback = dwmac4_set_mac_loopback, .update_vlan_hash = dwmac4_update_vlan_hash, .sarc_configure = dwmac4_sarc_configure, .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, + .config_l3_filter = dwmac4_config_l3_filter, + .config_l4_filter = dwmac4_config_l4_filter, }; const struct stmmac_ops dwmac410_ops = { @@ -869,6 +982,8 @@ const struct stmmac_ops dwmac410_ops = { .sarc_configure = dwmac4_sarc_configure, .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, + .config_l3_filter = dwmac4_config_l3_filter, + .config_l4_filter = dwmac4_config_l4_filter, }; const struct stmmac_ops dwmac510_ops = { @@ -910,6 +1025,8 @@ const struct stmmac_ops dwmac510_ops = { .sarc_configure = dwmac4_sarc_configure, .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, + .config_l3_filter = dwmac4_config_l3_filter, + .config_l4_filter = dwmac4_config_l4_filter, }; int dwmac4_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 15eb1abba91d..707ab5eba8da 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -431,8 +431,8 @@ static void dwmac4_get_addr(struct dma_desc *p, unsigned int *addr) static void dwmac4_set_addr(struct dma_desc *p, dma_addr_t addr) { - p->des0 = cpu_to_le32(addr); - p->des1 = 0; + p->des0 = cpu_to_le32(lower_32_bits(addr)); + p->des1 = cpu_to_le32(upper_32_bits(addr)); } static void dwmac4_clear(struct dma_desc *p) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 68c157979b94..b24c89572745 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -79,6 +79,10 @@ static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr, value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT); writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan)); + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame)) + writel(upper_32_bits(dma_rx_phy), + ioaddr + DMA_CHAN_RX_BASE_ADDR_HI(chan)); + writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan)); } @@ -97,6 +101,10 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr, writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan)); + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame)) + writel(upper_32_bits(dma_tx_phy), + ioaddr + DMA_CHAN_TX_BASE_ADDR_HI(chan)); + writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan)); } @@ -132,6 +140,9 @@ static void dwmac4_dma_init(void __iomem *ioaddr, if (dma_cfg->aal) value |= DMA_SYS_BUS_AAL; + if (dma_cfg->eame) + value |= DMA_SYS_BUS_EAME; + writel(value, ioaddr + DMA_SYS_BUS_MODE); } @@ -353,9 +364,27 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature1 */ hw_cap = readl(ioaddr + GMAC_HW_FEATURE1); + dma_cap->l3l4fnum = (hw_cap & GMAC_HW_FEAT_L3L4FNUM) >> 27; dma_cap->hash_tb_sz = (hw_cap & GMAC_HW_HASH_TB_SZ) >> 24; dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20; dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18; + + dma_cap->addr64 = (hw_cap & GMAC_HW_ADDR64) >> 14; + switch (dma_cap->addr64) { + case 0: + dma_cap->addr64 = 32; + break; + case 1: + dma_cap->addr64 = 40; + break; + case 2: + dma_cap->addr64 = 48; + break; + default: + dma_cap->addr64 = 32; + break; + } + /* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by * shifting and store the sizes in bytes. */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index b66da0237d2a..5299fa1001a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -65,6 +65,7 @@ #define DMA_SYS_BUS_MB BIT(14) #define DMA_AXI_1KBBE BIT(13) #define DMA_SYS_BUS_AAL BIT(12) +#define DMA_SYS_BUS_EAME BIT(11) #define DMA_AXI_BLEN256 BIT(7) #define DMA_AXI_BLEN128 BIT(6) #define DMA_AXI_BLEN64 BIT(5) @@ -91,7 +92,9 @@ #define DMA_CHAN_CONTROL(x) DMA_CHANX_BASE_ADDR(x) #define DMA_CHAN_TX_CONTROL(x) (DMA_CHANX_BASE_ADDR(x) + 0x4) #define DMA_CHAN_RX_CONTROL(x) (DMA_CHANX_BASE_ADDR(x) + 0x8) +#define DMA_CHAN_TX_BASE_ADDR_HI(x) (DMA_CHANX_BASE_ADDR(x) + 0x10) #define DMA_CHAN_TX_BASE_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x14) +#define DMA_CHAN_RX_BASE_ADDR_HI(x) (DMA_CHANX_BASE_ADDR(x) + 0x18) #define DMA_CHAN_RX_BASE_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x1c) #define DMA_CHAN_TX_END_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x20) #define DMA_CHAN_RX_END_ADDR(x) (DMA_CHANX_BASE_ADDR(x) + 0x28) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 5031398e612c..5cda360d5d07 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -555,7 +555,7 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw, } static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, - bool is_double) + u16 perfect_match, bool is_double) { void __iomem *ioaddr = hw->pcsr; @@ -576,6 +576,21 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, } writel(value, ioaddr + XGMAC_VLAN_TAG); + } else if (perfect_match) { + u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); + + value |= XGMAC_FILTER_VTFE; + + writel(value, ioaddr + XGMAC_PACKET_FILTER); + + value = XGMAC_VLAN_ETV; + if (is_double) { + value |= XGMAC_VLAN_EDVLP; + value |= XGMAC_VLAN_ESVL; + value |= XGMAC_VLAN_DOVLTC; + } + + writel(value | perfect_match, ioaddr + XGMAC_VLAN_TAG); } else { u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 965cbe3e6f51..7cc331996cd8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -27,7 +27,10 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr, if (dma_cfg->aal) value |= XGMAC_AAL; - writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE); + if (dma_cfg->eame) + value |= XGMAC_EAME; + + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); } static void dwxgmac2_dma_init_chan(void __iomem *ioaddr, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index ddb851d99618..1303d1e9a18f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -357,7 +357,7 @@ struct stmmac_ops { struct stmmac_rss *cfg, u32 num_rxq); /* VLAN */ void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash, - bool is_double); + u16 perfect_match, bool is_double); void (*enable_vlan)(struct mac_device_info *hw, u32 type); /* TX Timestamp */ int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3dfd04e0506a..28705dbe5801 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4207,15 +4207,25 @@ static u32 stmmac_vid_crc32_le(__le16 vid_le) static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double) { u32 crc, hash = 0; - u16 vid; + int count = 0; + u16 vid = 0; for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) { __le16 vid_le = cpu_to_le16(vid); crc = bitrev32(~stmmac_vid_crc32_le(vid_le)) >> 28; hash |= (1 << crc); + count++; + } + + if (!priv->dma_cap.vlhash) { + if (count > 2) /* VID = 0 always passes filter */ + return -EOPNOTSUPP; + + vid = cpu_to_le16(vid); + hash = 0; } - return stmmac_update_vlan_hash(priv, priv->hw, hash, is_double); + return stmmac_update_vlan_hash(priv, priv->hw, hash, vid, is_double); } static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) @@ -4224,8 +4234,6 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid bool is_double = false; int ret; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; @@ -4244,8 +4252,6 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi struct stmmac_priv *priv = netdev_priv(ndev); bool is_double = false; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; @@ -4515,6 +4521,13 @@ int stmmac_dvr_probe(struct device *device, if (!ret) { dev_info(priv->device, "Using %d bits DMA width\n", priv->dma_cap.addr64); + + /* + * If more than 32 bits can be addressed, make sure to + * enable enhanced addressing mode. + */ + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)) + priv->plat->dma_cfg->eame = true; } else { ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32)); if (ret) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index e4ac3c401432..0b5db52149bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -823,16 +823,13 @@ out: return 0; } -static int stmmac_test_vlanfilt(struct stmmac_priv *priv) +static int __stmmac_test_vlanfilt(struct stmmac_priv *priv) { struct stmmac_packet_attrs attr = { }; struct stmmac_test_priv *tpriv; struct sk_buff *skb = NULL; int ret = 0, i; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; - tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); if (!tpriv) return -ENOMEM; @@ -898,16 +895,32 @@ cleanup: return ret; } -static int stmmac_test_dvlanfilt(struct stmmac_priv *priv) +static int stmmac_test_vlanfilt(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.vlhash) + return -EOPNOTSUPP; + + return __stmmac_test_vlanfilt(priv); +} + +static int stmmac_test_vlanfilt_perfect(struct stmmac_priv *priv) +{ + int ret, prev_cap = priv->dma_cap.vlhash; + + priv->dma_cap.vlhash = 0; + ret = __stmmac_test_vlanfilt(priv); + priv->dma_cap.vlhash = prev_cap; + + return ret; +} + +static int __stmmac_test_dvlanfilt(struct stmmac_priv *priv) { struct stmmac_packet_attrs attr = { }; struct stmmac_test_priv *tpriv; struct sk_buff *skb = NULL; int ret = 0, i; - if (!priv->dma_cap.vlhash) - return -EOPNOTSUPP; - tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); if (!tpriv) return -ENOMEM; @@ -974,6 +987,25 @@ cleanup: return ret; } +static int stmmac_test_dvlanfilt(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.vlhash) + return -EOPNOTSUPP; + + return __stmmac_test_dvlanfilt(priv); +} + +static int stmmac_test_dvlanfilt_perfect(struct stmmac_priv *priv) +{ + int ret, prev_cap = priv->dma_cap.vlhash; + + priv->dma_cap.vlhash = 0; + ret = __stmmac_test_dvlanfilt(priv); + priv->dma_cap.vlhash = prev_cap; + + return ret; +} + #ifdef CONFIG_NET_CLS_ACT static int stmmac_test_rxp(struct stmmac_priv *priv) { @@ -1648,119 +1680,127 @@ static const struct stmmac_test { int (*fn)(struct stmmac_priv *priv); } stmmac_selftests[] = { { - .name = "MAC Loopback ", + .name = "MAC Loopback ", .lb = STMMAC_LOOPBACK_MAC, .fn = stmmac_test_mac_loopback, }, { - .name = "PHY Loopback ", + .name = "PHY Loopback ", .lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */ .fn = stmmac_test_phy_loopback, }, { - .name = "MMC Counters ", + .name = "MMC Counters ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_mmc, }, { - .name = "EEE ", + .name = "EEE ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_eee, }, { - .name = "Hash Filter MC ", + .name = "Hash Filter MC ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_hfilt, }, { - .name = "Perfect Filter UC ", + .name = "Perfect Filter UC ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_pfilt, }, { - .name = "MC Filter ", + .name = "MC Filter ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_mcfilt, }, { - .name = "UC Filter ", + .name = "UC Filter ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_ucfilt, }, { - .name = "Flow Control ", + .name = "Flow Control ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_flowctrl, }, { - .name = "RSS ", + .name = "RSS ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_rss, }, { - .name = "VLAN Filtering ", + .name = "VLAN Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_vlanfilt, }, { - .name = "Double VLAN Filtering", + .name = "VLAN Filtering (perf) ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_vlanfilt_perfect, + }, { + .name = "Double VLAN Filter ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_dvlanfilt, }, { - .name = "Flexible RX Parser ", + .name = "Double VLAN Filter (perf) ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_dvlanfilt_perfect, + }, { + .name = "Flexible RX Parser ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_rxp, }, { - .name = "SA Insertion (desc) ", + .name = "SA Insertion (desc) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_desc_sai, }, { - .name = "SA Replacement (desc)", + .name = "SA Replacement (desc) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_desc_sar, }, { - .name = "SA Insertion (reg) ", + .name = "SA Insertion (reg) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_reg_sai, }, { - .name = "SA Replacement (reg)", + .name = "SA Replacement (reg) ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_reg_sar, }, { - .name = "VLAN TX Insertion ", + .name = "VLAN TX Insertion ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_vlanoff, }, { - .name = "SVLAN TX Insertion ", + .name = "SVLAN TX Insertion ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_svlanoff, }, { - .name = "L3 DA Filtering ", + .name = "L3 DA Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l3filt_da, }, { - .name = "L3 SA Filtering ", + .name = "L3 SA Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l3filt_sa, }, { - .name = "L4 DA TCP Filtering ", + .name = "L4 DA TCP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_da_tcp, }, { - .name = "L4 SA TCP Filtering ", + .name = "L4 SA TCP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_sa_tcp, }, { - .name = "L4 DA UDP Filtering ", + .name = "L4 DA UDP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_da_udp, }, { - .name = "L4 SA UDP Filtering ", + .name = "L4 SA UDP Filtering ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_l4filt_sa_udp, }, { - .name = "ARP Offload ", + .name = "ARP Offload ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_arpoffload, }, { - .name = "Jumbo Frame ", + .name = "Jumbo Frame ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_jumbo, }, { - .name = "Multichannel Jumbo ", + .name = "Multichannel Jumbo ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_mjumbo, }, { - .name = "Split Header ", + .name = "Split Header ", .lb = STMMAC_LOOPBACK_PHY, .fn = stmmac_test_sph, }, diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 887bbba4631e..b0ac557f8e60 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -108,8 +108,8 @@ static void ipvlan_port_destroy(struct net_device *dev) #define IPVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_CSUM_MASK | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ - NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \ - NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ + NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_GSO_ROBUST | \ + NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) #define IPVLAN_STATE_MASK \ diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index 09f1315d2f2a..f4d8f62f28c2 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ - netdev.o dev.o fib.o bus.o + netdev.o dev.o fib.o bus.o health.o ifeq ($(CONFIG_BPF_SYSCALL),y) netdevsim-objs += \ diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 1a0ff3d7747b..6aeed0c600f8 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -283,6 +283,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.bus = &nsim_bus; nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; + nsim_bus_dev->initial_net = current->nsproxy->net_ns; err = device_register(&nsim_bus_dev->dev); if (err) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 56576d4f34a5..468e157a7cb1 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -90,6 +90,10 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) &nsim_dev->test1); debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, &nsim_dev_take_snapshot_fops); + debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir, + &nsim_dev->dont_allow_reload); + debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir, + &nsim_dev->fail_reload); return 0; } @@ -123,39 +127,6 @@ static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port) debugfs_remove_recursive(nsim_dev_port->ddir); } -static struct net *nsim_devlink_net(struct devlink *devlink) -{ - return &init_net; -} - -static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); -} - -static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); -} - -static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); -} - -static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); -} - static int nsim_dev_resources_register(struct devlink *devlink) { struct devlink_resource_size_params params = { @@ -163,9 +134,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; - struct net *net = nsim_devlink_net(devlink); int err; - u64 n; /* Resources for IPv4 */ err = devlink_resource_register(devlink, "IPv4", (u64)-1, @@ -177,8 +146,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); - err = devlink_resource_register(devlink, "fib", n, + err = devlink_resource_register(devlink, "fib", (u64)-1, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); if (err) { @@ -186,8 +154,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, + err = devlink_resource_register(devlink, "fib-rules", (u64)-1, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); if (err) { @@ -205,8 +172,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); - err = devlink_resource_register(devlink, "fib", n, + err = devlink_resource_register(devlink, "fib", (u64)-1, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); if (err) { @@ -214,8 +180,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, + err = devlink_resource_register(devlink, "fib-rules", (u64)-1, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); if (err) { @@ -223,22 +188,6 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB, - nsim_dev_ipv4_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB_RULES, - nsim_dev_ipv4_fib_rules_res_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB, - nsim_dev_ipv6_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB_RULES, - nsim_dev_ipv6_fib_rules_res_occ_get, - net); out: return err; } @@ -524,36 +473,48 @@ static void nsim_dev_traps_exit(struct devlink *devlink) kfree(nsim_dev->trap_data); } -static int nsim_dev_reload_down(struct devlink *devlink, +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack); +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev); + +static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + if (nsim_dev->dont_allow_reload) { + /* For testing purposes, user set debugfs dont_allow_reload + * value to true. So forbid it. + */ + NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); + return -EOPNOTSUPP; + } + + nsim_dev_reload_destroy(nsim_dev); return 0; } static int nsim_dev_reload_up(struct devlink *devlink, struct netlink_ext_ack *extack) { - enum nsim_resource_id res_ids[] = { - NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES - }; - struct net *net = nsim_devlink_net(devlink); - int i; - - for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { - int err; - u64 val; + struct nsim_dev *nsim_dev = devlink_priv(devlink); - err = devlink_resource_size_get(devlink, res_ids[i], &val); - if (!err) { - err = nsim_fib_set_max(net, res_ids[i], val, extack); - if (err) - return err; - } + if (nsim_dev->fail_reload) { + /* For testing purposes, user set debugfs fail_reload + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); + return -EINVAL; } - nsim_devlink_param_load_driverinit_values(devlink); - return 0; + return nsim_dev_reload_create(nsim_dev, extack); +} + +static int nsim_dev_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + return devlink_info_driver_name_put(req, DRV_NAME); } #define NSIM_DEV_FLASH_SIZE 500000 @@ -649,6 +610,7 @@ nsim_dev_devlink_trap_action_set(struct devlink *devlink, static const struct devlink_ops nsim_dev_devlink_ops = { .reload_down = nsim_dev_reload_down, .reload_up = nsim_dev_reload_up, + .info_get = nsim_dev_info_get, .flash_update = nsim_dev_flash_update, .trap_init = nsim_dev_devlink_trap_init, .trap_action_set = nsim_dev_devlink_trap_action_set, @@ -657,8 +619,139 @@ static const struct devlink_ops nsim_dev_devlink_ops = { #define NSIM_DEV_MAX_MACS_DEFAULT 32 #define NSIM_DEV_TEST1_DEFAULT true -static struct nsim_dev * -nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) +static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, + unsigned int port_index) +{ + struct nsim_dev_port *nsim_dev_port; + struct devlink_port *devlink_port; + int err; + + nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); + if (!nsim_dev_port) + return -ENOMEM; + nsim_dev_port->port_index = port_index; + + devlink_port = &nsim_dev_port->devlink_port; + devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_index + 1, 0, 0, + nsim_dev->switch_id.id, + nsim_dev->switch_id.id_len); + err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, + port_index); + if (err) + goto err_port_free; + + err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); + if (err) + goto err_dl_port_unregister; + + nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port); + if (IS_ERR(nsim_dev_port->ns)) { + err = PTR_ERR(nsim_dev_port->ns); + goto err_port_debugfs_exit; + } + + devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); + list_add(&nsim_dev_port->list, &nsim_dev->port_list); + + return 0; + +err_port_debugfs_exit: + nsim_dev_port_debugfs_exit(nsim_dev_port); +err_dl_port_unregister: + devlink_port_unregister(devlink_port); +err_port_free: + kfree(nsim_dev_port); + return err; +} + +static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) +{ + struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; + + list_del(&nsim_dev_port->list); + devlink_port_type_clear(devlink_port); + nsim_destroy(nsim_dev_port->ns); + nsim_dev_port_debugfs_exit(nsim_dev_port); + devlink_port_unregister(devlink_port); + kfree(nsim_dev_port); +} + +static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_port *nsim_dev_port, *tmp; + + list_for_each_entry_safe(nsim_dev_port, tmp, + &nsim_dev->port_list, list) + __nsim_dev_port_del(nsim_dev_port); +} + +static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, + unsigned int port_count) +{ + int i, err; + + for (i = 0; i < port_count; i++) { + err = __nsim_dev_port_add(nsim_dev, i); + if (err) + goto err_port_del_all; + } + return 0; + +err_port_del_all: + nsim_dev_port_del_all(nsim_dev); + return err; +} + +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack) +{ + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + struct devlink *devlink; + int err; + + devlink = priv_to_devlink(nsim_dev); + nsim_dev = devlink_priv(devlink); + INIT_LIST_HEAD(&nsim_dev->port_list); + mutex_init(&nsim_dev->port_list_lock); + nsim_dev->fw_update_status = true; + + nsim_dev->fib_data = nsim_fib_create(devlink, extack); + if (IS_ERR(nsim_dev->fib_data)) + return PTR_ERR(nsim_dev->fib_data); + + nsim_devlink_param_load_driverinit_values(devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + goto err_fib_destroy; + + err = nsim_dev_traps_init(devlink); + if (err) + goto err_dummy_region_exit; + + err = nsim_dev_health_init(nsim_dev, devlink); + if (err) + goto err_traps_exit; + + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_health_exit; + + return 0; + +err_health_exit: + nsim_dev_health_exit(nsim_dev); +err_traps_exit: + nsim_dev_traps_exit(devlink); +err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); + return err; +} + +static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; struct devlink *devlink; @@ -667,6 +760,7 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev)); if (!devlink) return ERR_PTR(-ENOMEM); + devlink_net_set(devlink, nsim_bus_dev->initial_net); nsim_dev = devlink_priv(devlink); nsim_dev->nsim_bus_dev = nsim_bus_dev; nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); @@ -681,9 +775,15 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) if (err) goto err_devlink_free; + nsim_dev->fib_data = nsim_fib_create(devlink, NULL); + if (IS_ERR(nsim_dev->fib_data)) { + err = PTR_ERR(nsim_dev->fib_data); + goto err_resources_unregister; + } + err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) - goto err_resources_unregister; + goto err_fib_destroy; err = devlink_params_register(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); @@ -703,13 +803,25 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) if (err) goto err_traps_exit; - err = nsim_bpf_dev_init(nsim_dev); + err = nsim_dev_health_init(nsim_dev, devlink); if (err) goto err_debugfs_exit; + err = nsim_bpf_dev_init(nsim_dev); + if (err) + goto err_health_exit; + + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_bpf_dev_exit; + devlink_params_publish(devlink); return nsim_dev; +err_bpf_dev_exit: + nsim_bpf_dev_exit(nsim_dev); +err_health_exit: + nsim_dev_health_exit(nsim_dev); err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); err_traps_exit: @@ -721,6 +833,8 @@ err_params_unregister: ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: devlink_unregister(devlink); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); err_resources_unregister: devlink_resources_unregister(devlink, NULL); err_devlink_free: @@ -728,118 +842,51 @@ err_devlink_free: return ERR_PTR(err); } -static void nsim_dev_destroy(struct nsim_dev *nsim_dev) +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) { struct devlink *devlink = priv_to_devlink(nsim_dev); - nsim_bpf_dev_exit(nsim_dev); - nsim_dev_debugfs_exit(nsim_dev); + if (devlink_is_reload_failed(devlink)) + return; + nsim_dev_port_del_all(nsim_dev); + nsim_dev_health_exit(nsim_dev); nsim_dev_traps_exit(devlink); nsim_dev_dummy_region_exit(nsim_dev); - devlink_params_unregister(devlink, nsim_devlink_params, - ARRAY_SIZE(nsim_devlink_params)); - devlink_unregister(devlink); - devlink_resources_unregister(devlink, NULL); mutex_destroy(&nsim_dev->port_list_lock); - devlink_free(devlink); + nsim_fib_destroy(devlink, nsim_dev->fib_data); } -static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, - unsigned int port_index) -{ - struct nsim_dev_port *nsim_dev_port; - struct devlink_port *devlink_port; - int err; - - nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); - if (!nsim_dev_port) - return -ENOMEM; - nsim_dev_port->port_index = port_index; - - devlink_port = &nsim_dev_port->devlink_port; - devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_index + 1, 0, 0, - nsim_dev->switch_id.id, - nsim_dev->switch_id.id_len); - err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, - port_index); - if (err) - goto err_port_free; - - err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); - if (err) - goto err_dl_port_unregister; - - nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port); - if (IS_ERR(nsim_dev_port->ns)) { - err = PTR_ERR(nsim_dev_port->ns); - goto err_port_debugfs_exit; - } - - devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); - list_add(&nsim_dev_port->list, &nsim_dev->port_list); - - return 0; - -err_port_debugfs_exit: - nsim_dev_port_debugfs_exit(nsim_dev_port); -err_dl_port_unregister: - devlink_port_unregister(devlink_port); -err_port_free: - kfree(nsim_dev_port); - return err; -} - -static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) +static void nsim_dev_destroy(struct nsim_dev *nsim_dev) { - struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; - - list_del(&nsim_dev_port->list); - devlink_port_type_clear(devlink_port); - nsim_destroy(nsim_dev_port->ns); - nsim_dev_port_debugfs_exit(nsim_dev_port); - devlink_port_unregister(devlink_port); - kfree(nsim_dev_port); -} + struct devlink *devlink = priv_to_devlink(nsim_dev); -static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) -{ - struct nsim_dev_port *nsim_dev_port, *tmp; + nsim_dev_reload_destroy(nsim_dev); - list_for_each_entry_safe(nsim_dev_port, tmp, - &nsim_dev->port_list, list) - __nsim_dev_port_del(nsim_dev_port); + nsim_bpf_dev_exit(nsim_dev); + nsim_dev_debugfs_exit(nsim_dev); + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); + devlink_unregister(devlink); + devlink_resources_unregister(devlink, NULL); + devlink_free(devlink); } int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; - int i; - int err; - nsim_dev = nsim_dev_create(nsim_bus_dev, nsim_bus_dev->port_count); + nsim_dev = nsim_dev_create(nsim_bus_dev); if (IS_ERR(nsim_dev)) return PTR_ERR(nsim_dev); dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); - for (i = 0; i < nsim_bus_dev->port_count; i++) { - err = __nsim_dev_port_add(nsim_dev, i); - if (err) - goto err_port_del_all; - } return 0; - -err_port_del_all: - nsim_dev_port_del_all(nsim_dev); - nsim_dev_destroy(nsim_dev); - return err; } void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); - nsim_dev_port_del_all(nsim_dev); nsim_dev_destroy(nsim_dev); } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 1a251f76d09b..13540dee7364 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -18,7 +18,7 @@ #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/fib_rules.h> -#include <net/netns/generic.h> +#include <net/net_namespace.h> #include "netdevsim.h" @@ -33,15 +33,14 @@ struct nsim_per_fib_data { }; struct nsim_fib_data { + struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; }; -static unsigned int nsim_fib_net_id; - -u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) +u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, bool max) { - struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; switch (res_id) { @@ -64,12 +63,10 @@ u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) return max ? entry->max : entry->num; } -int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack) +static void nsim_fib_set_max(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, u64 val) { - struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; - int err = 0; switch (res_id) { case NSIM_RESOURCE_IPV4_FIB: @@ -85,20 +82,10 @@ int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, entry = &fib_data->ipv6.rules; break; default: - return 0; - } - - /* not allowing a new max to be less than curren occupancy - * --> no means of evicting entries - */ - if (val < entry->num) { - NL_SET_ERR_MSG_MOD(extack, "New size is less than current occupancy"); - err = -EINVAL; - } else { - entry->max = val; + WARN_ON(1); + return; } - - return err; + entry->max = val; } static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, @@ -120,9 +107,9 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add) +static int nsim_fib_rule_event(struct nsim_fib_data *data, + struct fib_notifier_info *info, bool add) { - struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -157,9 +144,9 @@ static int nsim_fib_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_event(struct fib_notifier_info *info, bool add) +static int nsim_fib_event(struct nsim_fib_data *data, + struct fib_notifier_info *info, bool add) { - struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -178,18 +165,22 @@ static int nsim_fib_event(struct fib_notifier_info *info, bool add) static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + fib_nb); struct fib_notifier_info *info = ptr; int err = 0; switch (event) { case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD); + err = nsim_fib_rule_event(data, info, + event == FIB_EVENT_RULE_ADD); break; case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD); + err = nsim_fib_event(data, info, + event == FIB_EVENT_ENTRY_ADD); break; } @@ -199,69 +190,116 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, /* inconsistent dump, trying again */ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) { - struct nsim_fib_data *data; - struct net *net; + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + fib_nb); - rcu_read_lock(); - for_each_net_rcu(net) { - data = net_generic(net, nsim_fib_net_id); + data->ipv4.fib.num = 0ULL; + data->ipv4.rules.num = 0ULL; + data->ipv6.fib.num = 0ULL; + data->ipv6.rules.num = 0ULL; +} - data->ipv4.fib.num = 0ULL; - data->ipv4.rules.num = 0ULL; +static u64 nsim_fib_ipv4_resource_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; - data->ipv6.fib.num = 0ULL; - data->ipv6.rules.num = 0ULL; - } - rcu_read_unlock(); + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB, false); } -static struct notifier_block nsim_fib_nb = { - .notifier_call = nsim_fib_event_nb, -}; - -/* Initialize per network namespace state */ -static int __net_init nsim_fib_netns_init(struct net *net) +static u64 nsim_fib_ipv4_rules_res_occ_get(void *priv) { - struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id); + struct nsim_fib_data *data = priv; - data->ipv4.fib.max = (u64)-1; - data->ipv4.rules.max = (u64)-1; + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB_RULES, false); +} - data->ipv6.fib.max = (u64)-1; - data->ipv6.rules.max = (u64)-1; +static u64 nsim_fib_ipv6_resource_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; - return 0; + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB, false); } -static struct pernet_operations nsim_fib_net_ops = { - .init = nsim_fib_netns_init, - .id = &nsim_fib_net_id, - .size = sizeof(struct nsim_fib_data), -}; +static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); +} -void nsim_fib_exit(void) +static void nsim_fib_set_max_all(struct nsim_fib_data *data, + struct devlink *devlink) { - unregister_fib_notifier(&nsim_fib_nb); - unregister_pernet_subsys(&nsim_fib_net_ops); + enum nsim_resource_id res_ids[] = { + NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, + NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES + }; + int i; + + for (i = 0; i < ARRAY_SIZE(res_ids); i++) { + int err; + u64 val; + + err = devlink_resource_size_get(devlink, res_ids[i], &val); + if (err) + val = (u64) -1; + nsim_fib_set_max(data, res_ids[i], val); + } } -int nsim_fib_init(void) +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack) { + struct nsim_fib_data *data; int err; - err = register_pernet_subsys(&nsim_fib_net_ops); - if (err < 0) { - pr_err("Failed to register pernet subsystem\n"); - goto err_out; - } + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + nsim_fib_set_max_all(data, devlink); - err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent); - if (err < 0) { + data->fib_nb.notifier_call = nsim_fib_event_nb; + err = register_fib_notifier(devlink_net(devlink), &data->fib_nb, + nsim_fib_dump_inconsistent, extack); + if (err) { pr_err("Failed to register fib notifier\n"); - unregister_pernet_subsys(&nsim_fib_net_ops); goto err_out; } + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB, + nsim_fib_ipv4_resource_occ_get, + data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES, + nsim_fib_ipv4_rules_res_occ_get, + data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB, + nsim_fib_ipv6_resource_occ_get, + data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES, + nsim_fib_ipv6_rules_res_occ_get, + data); + return data; + err_out: - return err; + kfree(data); + return ERR_PTR(err); +} + +void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) +{ + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES); + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB); + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES); + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB); + unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); + kfree(data); } diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c new file mode 100644 index 000000000000..2716235a0336 --- /dev/null +++ b/drivers/net/netdevsim/health.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "netdevsim.h" + +static int +nsim_dev_empty_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int +nsim_dev_empty_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static const +struct devlink_health_reporter_ops nsim_dev_empty_reporter_ops = { + .name = "empty", + .dump = nsim_dev_empty_reporter_dump, + .diagnose = nsim_dev_empty_reporter_diagnose, +}; + +struct nsim_dev_dummy_reporter_ctx { + char *break_msg; +}; + +static int +nsim_dev_dummy_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + + if (health->fail_recover) { + /* For testing purposes, user set debugfs fail_recover + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the recover to fail for testing purposes"); + return -EINVAL; + } + if (ctx) { + kfree(health->recovered_break_msg); + health->recovered_break_msg = kstrdup(ctx->break_msg, + GFP_KERNEL); + if (!health->recovered_break_msg) + return -ENOMEM; + } + return 0; +} + +static int nsim_dev_dummy_fmsg_put(struct devlink_fmsg *fmsg, u32 binary_len) +{ + char *binary; + int err; + int i; + + err = devlink_fmsg_bool_pair_put(fmsg, "test_bool", true); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "test_u8", 1); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "test_u32", 3); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "test_u64", 4); + if (err) + return err; + err = devlink_fmsg_string_pair_put(fmsg, "test_string", "somestring"); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_binary"); + if (err) + return err; + binary = kmalloc(binary_len, GFP_KERNEL); + if (!binary) + return -ENOMEM; + get_random_bytes(binary, binary_len); + err = devlink_fmsg_binary_put(fmsg, binary, binary_len); + kfree(binary); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_start(fmsg, "test_nest"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_bool_pair_put(fmsg, "nested_test_bool", false); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "nested_test_u8", false); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_bool_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_bool_put(fmsg, true); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u8_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u8_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u32_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u32_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u64_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u64_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_array_of_objects"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_bool_pair_put(fmsg, + "in_array_nested_test_bool", + false); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, + "in_array_nested_test_u8", + i); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + } + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int +nsim_dev_dummy_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + int err; + + if (ctx) { + err = devlink_fmsg_string_pair_put(fmsg, "break_message", + ctx->break_msg); + if (err) + return err; + } + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static int +nsim_dev_dummy_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + int err; + + if (health->recovered_break_msg) { + err = devlink_fmsg_string_pair_put(fmsg, + "recovered_break_message", + health->recovered_break_msg); + if (err) + return err; + } + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static const +struct devlink_health_reporter_ops nsim_dev_dummy_reporter_ops = { + .name = "dummy", + .recover = nsim_dev_dummy_reporter_recover, + .dump = nsim_dev_dummy_reporter_dump, + .diagnose = nsim_dev_dummy_reporter_diagnose, +}; + +static ssize_t nsim_dev_health_break_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_health *health = file->private_data; + struct nsim_dev_dummy_reporter_ctx ctx; + char *break_msg; + int err; + + break_msg = kmalloc(count + 1, GFP_KERNEL); + if (!break_msg) + return -ENOMEM; + + if (copy_from_user(break_msg, data, count)) { + err = -EFAULT; + goto out; + } + break_msg[count] = '\0'; + if (break_msg[count - 1] == '\n') + break_msg[count - 1] = '\0'; + + ctx.break_msg = break_msg; + err = devlink_health_report(health->dummy_reporter, break_msg, &ctx); + if (err) + goto out; + +out: + kfree(break_msg); + return err ?: count; +} + +static const struct file_operations nsim_dev_health_break_fops = { + .open = simple_open, + .write = nsim_dev_health_break_write, + .llseek = generic_file_llseek, +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) +{ + struct nsim_dev_health *health = &nsim_dev->health; + int err; + + health->empty_reporter = + devlink_health_reporter_create(devlink, + &nsim_dev_empty_reporter_ops, + 0, false, health); + if (IS_ERR(health->empty_reporter)) + return PTR_ERR(health->empty_reporter); + + health->dummy_reporter = + devlink_health_reporter_create(devlink, + &nsim_dev_dummy_reporter_ops, + 0, false, health); + if (IS_ERR(health->dummy_reporter)) { + err = PTR_ERR(health->dummy_reporter); + goto err_empty_reporter_destroy; + } + + health->ddir = debugfs_create_dir("health", nsim_dev->ddir); + if (IS_ERR_OR_NULL(health->ddir)) { + err = PTR_ERR_OR_ZERO(health->ddir) ?: -EINVAL; + goto err_dummy_reporter_destroy; + } + + health->recovered_break_msg = NULL; + debugfs_create_file("break_health", 0200, health->ddir, health, + &nsim_dev_health_break_fops); + health->binary_len = 16; + debugfs_create_u32("binary_len", 0600, health->ddir, + &health->binary_len); + health->fail_recover = false; + debugfs_create_bool("fail_recover", 0600, health->ddir, + &health->fail_recover); + return 0; + +err_dummy_reporter_destroy: + devlink_health_reporter_destroy(health->dummy_reporter); +err_empty_reporter_destroy: + devlink_health_reporter_destroy(health->empty_reporter); + return err; +} + +void nsim_dev_health_exit(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_health *health = &nsim_dev->health; + + debugfs_remove_recursive(health->ddir); + kfree(health->recovered_break_msg); + devlink_health_reporter_destroy(health->dummy_reporter); + devlink_health_reporter_destroy(health->empty_reporter); +} diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 55f57f76d01b..2908e0a0d6e1 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -290,6 +290,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) if (!dev) return ERR_PTR(-ENOMEM); + dev_net_set(dev, nsim_dev_net(nsim_dev)); ns = netdev_priv(dev); ns->netdev = dev; ns->nsim_dev = nsim_dev; @@ -357,18 +358,12 @@ static int __init nsim_module_init(void) if (err) goto err_dev_exit; - err = nsim_fib_init(); - if (err) - goto err_bus_exit; - err = rtnl_link_register(&nsim_link_ops); if (err) - goto err_fib_exit; + goto err_bus_exit; return 0; -err_fib_exit: - nsim_fib_exit(); err_bus_exit: nsim_bus_exit(); err_dev_exit: @@ -379,7 +374,6 @@ err_dev_exit: static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); - nsim_fib_exit(); nsim_bus_exit(); nsim_dev_exit(); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 66bf13765ad0..94df795ef4d3 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -134,6 +134,18 @@ enum nsim_resource_id { NSIM_RESOURCE_IPV6_FIB_RULES, }; +struct nsim_dev_health { + struct devlink_health_reporter *empty_reporter; + struct devlink_health_reporter *dummy_reporter; + struct dentry *ddir; + char *recovered_break_msg; + u32 binary_len; + bool fail_recover; +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink); +void nsim_dev_health_exit(struct nsim_dev *nsim_dev); + struct nsim_dev_port { struct list_head list; struct devlink_port devlink_port; @@ -161,9 +173,17 @@ struct nsim_dev { bool fw_update_status; u32 max_macs; bool test1; + bool dont_allow_reload; + bool fail_reload; struct devlink_region *dummy_region; + struct nsim_dev_health health; }; +static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) +{ + return devlink_net(priv_to_devlink(nsim_dev)); +} + int nsim_dev_init(void); void nsim_dev_exit(void); int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev); @@ -173,11 +193,11 @@ int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index); -int nsim_fib_init(void); -void nsim_fib_exit(void); -u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack); +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack); +void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data); +u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, bool max); #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); @@ -215,6 +235,9 @@ struct nsim_bus_dev { struct device dev; struct list_head list; unsigned int port_count; + struct net *initial_net; /* Purpose of this is to carry net pointer + * during the probe time only. + */ unsigned int num_vfs; struct nsim_vf_config *vfconfigs; }; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 1eb5d4fb8925..8e30db28fd7d 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -62,6 +62,7 @@ #define AT803X_DEBUG_REG_5 0x05 #define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8) +#define ATH9331_PHY_ID 0x004dd041 #define ATH8030_PHY_ID 0x004dd076 #define ATH8031_PHY_ID 0x004dd074 #define ATH8035_PHY_ID 0x004dd072 @@ -71,10 +72,6 @@ MODULE_DESCRIPTION("Atheros 803x PHY driver"); MODULE_AUTHOR("Matus Ujhelyi"); MODULE_LICENSE("GPL"); -struct at803x_priv { - bool phy_reset:1; -}; - struct at803x_context { u16 bmcr; u16 advertise; @@ -240,20 +237,6 @@ static int at803x_resume(struct phy_device *phydev) return phy_modify(phydev, MII_BMCR, BMCR_PDOWN | BMCR_ISOLATE, 0); } -static int at803x_probe(struct phy_device *phydev) -{ - struct device *dev = &phydev->mdio.dev; - struct at803x_priv *priv; - - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - phydev->priv = priv; - - return 0; -} - static int at803x_config_init(struct phy_device *phydev) { int ret; @@ -430,7 +413,6 @@ static struct phy_driver at803x_driver[] = { .phy_id = ATH8035_PHY_ID, .name = "Atheros 8035 ethernet", .phy_id_mask = AT803X_PHY_ID_MASK, - .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, @@ -445,7 +427,6 @@ static struct phy_driver at803x_driver[] = { .phy_id = ATH8030_PHY_ID, .name = "Atheros 8030 ethernet", .phy_id_mask = AT803X_PHY_ID_MASK, - .probe = at803x_probe, .config_init = at803x_config_init, .link_change_notify = at803x_link_change_notify, .set_wol = at803x_set_wol, @@ -460,7 +441,6 @@ static struct phy_driver at803x_driver[] = { .phy_id = ATH8031_PHY_ID, .name = "Atheros 8031 ethernet", .phy_id_mask = AT803X_PHY_ID_MASK, - .probe = at803x_probe, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, @@ -471,6 +451,16 @@ static struct phy_driver at803x_driver[] = { .aneg_done = at803x_aneg_done, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, +}, { + /* ATHEROS AR9331 */ + PHY_ID_MATCH_EXACT(ATH9331_PHY_ID), + .name = "Atheros AR9331 built-in PHY", + .config_init = at803x_config_init, + .suspend = at803x_suspend, + .resume = at803x_resume, + /* PHY_BASIC_FEATURES */ + .ack_interrupt = &at803x_ack_interrupt, + .config_intr = &at803x_config_intr, } }; module_phy_driver(at803x_driver); @@ -479,6 +469,7 @@ static struct mdio_device_id __maybe_unused atheros_tbl[] = { { ATH8030_PHY_ID, AT803X_PHY_ID_MASK }, { ATH8031_PHY_ID, AT803X_PHY_ID_MASK }, { ATH8035_PHY_ID, AT803X_PHY_ID_MASK }, + { PHY_ID_MATCH_EXACT(ATH9331_PHY_ID) }, { } }; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 937d0059e8ac..4313c74b4fd8 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -26,18 +26,13 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); +static int bcm54xx_config_clock_delay(struct phy_device *phydev); + static int bcm54210e_config_init(struct phy_device *phydev) { int val; - val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); - val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; - val |= MII_BCM54XX_AUXCTL_MISC_WREN; - bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, val); - - val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); - val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; - bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + bcm54xx_config_clock_delay(phydev); if (phydev->dev_flags & PHY_BRCM_EN_MASTER_MODE) { val = phy_read(phydev, MII_CTRL1000); @@ -52,26 +47,7 @@ static int bcm54612e_config_init(struct phy_device *phydev) { int reg; - /* Clear TX internal delay unless requested. */ - if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && - (phydev->interface != PHY_INTERFACE_MODE_RGMII_TXID)) { - /* Disable TXD to GTXCLK clock delay (default set) */ - /* Bit 9 is the only field in shadow register 00011 */ - bcm_phy_write_shadow(phydev, 0x03, 0); - } - - /* Clear RX internal delay unless requested. */ - if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && - (phydev->interface != PHY_INTERFACE_MODE_RGMII_RXID)) { - reg = bcm54xx_auxctl_read(phydev, - MII_BCM54XX_AUXCTL_SHDWSEL_MISC); - /* Disable RXD to RXC delay (default set) */ - reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; - /* Clear shadow selector field */ - reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MASK; - bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, - MII_BCM54XX_AUXCTL_MISC_WREN | reg); - } + bcm54xx_config_clock_delay(phydev); /* Enable CLK125 MUX on LED4 if ref clock is enabled. */ if (!(phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)) { diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 7ada1fd9ca71..805cda3465d7 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -895,7 +895,7 @@ static void vsc85xx_tr_write(struct phy_device *phydev, u16 addr, u32 val) static int vsc8531_pre_init_seq_set(struct phy_device *phydev) { int rc; - const struct reg_val init_seq[] = { + static const struct reg_val init_seq[] = { {0x0f90, 0x00688980}, {0x0696, 0x00000003}, {0x07fa, 0x0050100f}, @@ -939,7 +939,7 @@ out_unlock: static int vsc85xx_eee_init_seq_set(struct phy_device *phydev) { - const struct reg_val init_eee[] = { + static const struct reg_val init_eee[] = { {0x0f82, 0x0012b00a}, {0x1686, 0x00000004}, {0x168c, 0x00d2c46f}, @@ -1224,7 +1224,7 @@ out: /* bus->mdio_lock should be locked when using this function */ static int vsc8574_config_pre_init(struct phy_device *phydev) { - const struct reg_val pre_init1[] = { + static const struct reg_val pre_init1[] = { {0x0fae, 0x000401bd}, {0x0fac, 0x000f000f}, {0x17a0, 0x00a0f147}, @@ -1272,7 +1272,7 @@ static int vsc8574_config_pre_init(struct phy_device *phydev) {0x0fee, 0x0004a6a1}, {0x0ffe, 0x00b01807}, }; - const struct reg_val pre_init2[] = { + static const struct reg_val pre_init2[] = { {0x0486, 0x0008a518}, {0x0488, 0x006dc696}, {0x048a, 0x00000912}, @@ -1427,7 +1427,7 @@ out: /* bus->mdio_lock should be locked when using this function */ static int vsc8584_config_pre_init(struct phy_device *phydev) { - const struct reg_val pre_init1[] = { + static const struct reg_val pre_init1[] = { {0x07fa, 0x0050100f}, {0x1688, 0x00049f81}, {0x0f90, 0x00688980}, @@ -1451,7 +1451,7 @@ static int vsc8584_config_pre_init(struct phy_device *phydev) {0x16b2, 0x00007000}, {0x16b4, 0x00000814}, }; - const struct reg_val pre_init2[] = { + static const struct reg_val pre_init2[] = { {0x0486, 0x0008a518}, {0x0488, 0x006dc696}, {0x048a, 0x00000912}, @@ -1786,7 +1786,7 @@ static int vsc8514_config_pre_init(struct phy_device *phydev) * values to handle hardware performance of PHY. They * are set at Power-On state and remain until PHY Reset. */ - const struct reg_val pre_init1[] = { + static const struct reg_val pre_init1[] = { {0x0f90, 0x00688980}, {0x0786, 0x00000003}, {0x07fa, 0x0050100f}, diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 9412669b579c..5458ed1b87a8 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -8,7 +8,7 @@ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 69, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 74, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -42,6 +42,8 @@ const char *phy_speed_to_str(int speed) return "100Gbps"; case SPEED_200000: return "200Gbps"; + case SPEED_400000: + return "400Gbps"; case SPEED_UNKNOWN: return "Unknown"; default: @@ -70,6 +72,12 @@ EXPORT_SYMBOL_GPL(phy_duplex_to_str); .bit = ETHTOOL_LINK_MODE_ ## b ## _BIT} static const struct phy_setting settings[] = { + /* 400G */ + PHY_SETTING( 400000, FULL, 400000baseCR8_Full ), + PHY_SETTING( 400000, FULL, 400000baseKR8_Full ), + PHY_SETTING( 400000, FULL, 400000baseLR8_ER8_FR8_Full ), + PHY_SETTING( 400000, FULL, 400000baseDR8_Full ), + PHY_SETTING( 400000, FULL, 400000baseSR8_Full ), /* 200G */ PHY_SETTING( 200000, FULL, 200000baseCR4_Full ), PHY_SETTING( 200000, FULL, 200000baseKR4_Full ), @@ -689,11 +697,17 @@ EXPORT_SYMBOL_GPL(phy_modify_mmd); static int __phy_read_page(struct phy_device *phydev) { + if (WARN_ONCE(!phydev->drv->read_page, "read_page callback not available, PHY driver not loaded?\n")) + return -EOPNOTSUPP; + return phydev->drv->read_page(phydev); } static int __phy_write_page(struct phy_device *phydev, int page) { + if (WARN_ONCE(!phydev->drv->write_page, "write_page callback not available, PHY driver not loaded?\n")) + return -EOPNOTSUPP; + return phydev->drv->write_page(phydev, page); } diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 20e2ebe458f2..be7a2c0fa59b 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -117,9 +117,7 @@ static int phylink_is_empty_linkmode(const unsigned long *linkmode) phylink_set(tmp, Pause); phylink_set(tmp, Asym_Pause); - bitmap_andnot(tmp, linkmode, tmp, __ETHTOOL_LINK_MODE_MASK_NBITS); - - return linkmode_empty(tmp); + return linkmode_subset(linkmode, tmp); } static const char *phylink_an_mode_str(unsigned int mode) @@ -550,26 +548,17 @@ static const struct sfp_upstream_ops sfp_phylink_ops; static int phylink_register_sfp(struct phylink *pl, struct fwnode_handle *fwnode) { - struct fwnode_reference_args ref; + struct sfp_bus *bus; int ret; - if (!fwnode) - return 0; - - ret = fwnode_property_get_reference_args(fwnode, "sfp", NULL, - 0, 0, &ref); - if (ret < 0) { - if (ret == -ENOENT) - return 0; - - phylink_err(pl, "unable to parse \"sfp\" node: %d\n", - ret); + bus = sfp_register_upstream_node(fwnode, pl, &sfp_phylink_ops); + if (IS_ERR(bus)) { + ret = PTR_ERR(bus); + phylink_err(pl, "unable to attach SFP bus: %d\n", ret); return ret; } - pl->sfp_bus = sfp_register_upstream(ref.fwnode, pl, &sfp_phylink_ops); - if (!pl->sfp_bus) - return -ENOMEM; + pl->sfp_bus = bus; return 0; } @@ -1728,8 +1717,7 @@ static int phylink_sfp_module_insert(void *upstream, if (phy_interface_mode_is_8023z(iface) && pl->phydev) return -EINVAL; - changed = !bitmap_equal(pl->supported, support, - __ETHTOOL_LINK_MODE_MASK_NBITS); + changed = !linkmode_equal(pl->supported, support); if (changed) { linkmode_copy(pl->supported, support); linkmode_copy(pl->link_config.advertising, config.advertising); diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index b23fc41896ef..d037aab6a71d 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -4,6 +4,7 @@ #include <linux/list.h> #include <linux/mutex.h> #include <linux/phylink.h> +#include <linux/property.h> #include <linux/rtnetlink.h> #include <linux/slab.h> @@ -445,45 +446,63 @@ static void sfp_upstream_clear(struct sfp_bus *bus) } /** - * sfp_register_upstream() - Register the neighbouring device - * @fwnode: firmware node for the SFP bus + * sfp_register_upstream_node() - parse and register the neighbouring device + * @fwnode: firmware node for the parent device (MAC or PHY) * @upstream: the upstream private data * @ops: the upstream's &struct sfp_upstream_ops * - * Register the upstream device (eg, PHY) with the SFP bus. MAC drivers - * should use phylink, which will call this function for them. Returns - * a pointer to the allocated &struct sfp_bus. + * Parse the parent device's firmware node for a SFP bus, and register the + * SFP bus using sfp_register_upstream(). * - * On error, returns %NULL. + * Returns: on success, a pointer to the sfp_bus structure, + * %NULL if no SFP is specified, + * on failure, an error pointer value: + * corresponding to the errors detailed for + * fwnode_property_get_reference_args(). + * %-ENOMEM if we failed to allocate the bus. + * an error from the upstream's connect_phy() method. */ -struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, - void *upstream, - const struct sfp_upstream_ops *ops) +struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, + void *upstream, + const struct sfp_upstream_ops *ops) { - struct sfp_bus *bus = sfp_bus_get(fwnode); - int ret = 0; + struct fwnode_reference_args ref; + struct sfp_bus *bus; + int ret; - if (bus) { - rtnl_lock(); - bus->upstream_ops = ops; - bus->upstream = upstream; + ret = fwnode_property_get_reference_args(fwnode, "sfp", NULL, + 0, 0, &ref); + if (ret == -ENOENT) + return NULL; + else if (ret < 0) + return ERR_PTR(ret); - if (bus->sfp) { - ret = sfp_register_bus(bus); - if (ret) - sfp_upstream_clear(bus); - } - rtnl_unlock(); + bus = sfp_bus_get(ref.fwnode); + fwnode_handle_put(ref.fwnode); + if (!bus) + return ERR_PTR(-ENOMEM); + + rtnl_lock(); + bus->upstream_ops = ops; + bus->upstream = upstream; + + if (bus->sfp) { + ret = sfp_register_bus(bus); + if (ret) + sfp_upstream_clear(bus); + } else { + ret = 0; } + rtnl_unlock(); if (ret) { sfp_bus_put(bus); - bus = NULL; + bus = ERR_PTR(ret); } return bus; } -EXPORT_SYMBOL_GPL(sfp_register_upstream); +EXPORT_SYMBOL_GPL(sfp_register_upstream_node); /** * sfp_unregister_upstream() - Unregister sfp bus diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8089def5a46..cb1d5fe60c31 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2066,7 +2066,8 @@ static int team_ethtool_get_link_ksettings(struct net_device *dev, cmd->base.duplex = DUPLEX_UNKNOWN; cmd->base.port = PORT_OTHER; - list_for_each_entry(port, &team->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) { if (team_port_txable(port)) { if (port->state.speed != SPEED_UNKNOWN) speed += port->state.speed; @@ -2075,6 +2076,8 @@ static int team_ethtool_get_link_ksettings(struct net_device *dev, cmd->base.duplex = port->state.duplex; } } + rcu_read_unlock(); + cmd->base.speed = speed ? : SPEED_UNKNOWN; return 0; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a8d3141582a5..dab6cccfeb52 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2290,7 +2290,13 @@ static void tun_free_netdev(struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); BUG_ON(!(list_empty(&tun->disabled))); + free_percpu(tun->pcpu_stats); + /* We clear pcpu_stats so that tun_set_iff() can tell if + * tun_free_netdev() has been called from register_netdevice(). + */ + tun->pcpu_stats = NULL; + tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); @@ -2782,9 +2788,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!dev) return -ENOMEM; - err = dev_get_valid_name(net, dev, name); - if (err < 0) - goto err_free_dev; dev_net_set(dev, net); dev->rtnl_link_ops = &tun_link_ops; @@ -2859,8 +2862,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err_detach: tun_detach_all(dev); - /* register_netdevice() already called tun_free_netdev() */ - goto err_free_dev; + /* We are here because register_netdevice() has failed. + * If register_netdevice() already called tun_free_netdev() + * while dealing with the error, tun->pcpu_stats has been cleared. + */ + if (!tun->pcpu_stats) + goto err_free_dev; err_free_flow: tun_flow_uninit(tun); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index daa54486ab09..c5a6e75c24e3 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1214,6 +1214,32 @@ static int ax88179_led_setting(struct usbnet *dev) return 0; } +static void ax88179_get_mac_addr(struct usbnet *dev) +{ + u8 mac[ETH_ALEN]; + + /* Maybe the boot loader passed the MAC address via device tree */ + if (!eth_platform_get_mac_address(&dev->udev->dev, mac)) { + netif_dbg(dev, ifup, dev->net, + "MAC address read from device tree"); + } else { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, + ETH_ALEN, mac); + netif_dbg(dev, ifup, dev->net, + "MAC address read from ASIX chip"); + } + + if (is_valid_ether_addr(mac)) { + memcpy(dev->net->dev_addr, mac, ETH_ALEN); + } else { + netdev_info(dev->net, "invalid MAC address, using random\n"); + eth_hw_addr_random(dev->net); + } + + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, + dev->net->dev_addr); +} + static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) { u8 buf[5]; @@ -1240,8 +1266,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); msleep(100); - ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, - ETH_ALEN, dev->net->dev_addr); + /* Read MAC address from DTB or asix chip */ + ax88179_get_mac_addr(dev); memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); /* RX bulk configuration */ @@ -1541,8 +1567,8 @@ static int ax88179_reset(struct usbnet *dev) /* Ethernet PHY Auto Detach*/ ax88179_auto_detach(dev, 0); - ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, - dev->net->dev_addr); + /* Read MAC address from DTB or asix chip */ + ax88179_get_mac_addr(dev); /* RX bulk configuration */ memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index cee9fef925cd..55d0bcb00aef 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -24,9 +24,11 @@ #include <linux/suspend.h> #include <linux/atomic.h> #include <linux/acpi.h> +#include <linux/firmware.h> +#include <crypto/hash.h> /* Information for net-next */ -#define NETNEXT_VERSION "10" +#define NETNEXT_VERSION "11" /* Information for net */ #define NET_VERSION "10" @@ -54,8 +56,11 @@ #define PLA_BDC_CR 0xd1a0 #define PLA_TEREDO_TIMER 0xd2cc #define PLA_REALWOW_TIMER 0xd2e8 +#define PLA_UPHY_TIMER 0xd388 #define PLA_SUSPEND_FLAG 0xd38a #define PLA_INDICATE_FALG 0xd38c +#define PLA_MACDBG_PRE 0xd38c /* RTL_VER_04 only */ +#define PLA_MACDBG_POST 0xd38e /* RTL_VER_04 only */ #define PLA_EXTRA_STATUS 0xd398 #define PLA_EFUSE_DATA 0xdd00 #define PLA_EFUSE_CMD 0xdd02 @@ -110,7 +115,12 @@ #define USB_CONNECT_TIMER 0xcbf8 #define USB_MSC_TIMER 0xcbfc #define USB_BURST_SIZE 0xcfc0 +#define USB_FW_FIX_EN0 0xcfca +#define USB_FW_FIX_EN1 0xcfcc #define USB_LPM_CONFIG 0xcfd8 +#define USB_CSTMR 0xcfef /* RTL8153A */ +#define USB_FW_CTRL 0xd334 /* RTL8153B */ +#define USB_FC_TIMER 0xd340 #define USB_USB_CTRL 0xd406 #define USB_PHY_CTRL 0xd408 #define USB_TX_AGG 0xd40a @@ -126,6 +136,7 @@ #define USB_LPM_CTRL 0xd41a #define USB_BMU_RESET 0xd4b0 #define USB_U1U2_TIMER 0xd4da +#define USB_FW_TASK 0xd4e8 /* RTL8153B */ #define USB_UPS_CTRL 0xd800 #define USB_POWER_CUT 0xd80a #define USB_MISC_0 0xd81a @@ -133,18 +144,19 @@ #define USB_AFE_CTRL2 0xd824 #define USB_UPS_CFG 0xd842 #define USB_UPS_FLAGS 0xd848 +#define USB_WDT1_CTRL 0xe404 #define USB_WDT11_CTRL 0xe43c -#define USB_BP_BA 0xfc26 -#define USB_BP_0 0xfc28 -#define USB_BP_1 0xfc2a -#define USB_BP_2 0xfc2c -#define USB_BP_3 0xfc2e -#define USB_BP_4 0xfc30 -#define USB_BP_5 0xfc32 -#define USB_BP_6 0xfc34 -#define USB_BP_7 0xfc36 -#define USB_BP_EN 0xfc38 -#define USB_BP_8 0xfc38 +#define USB_BP_BA PLA_BP_BA +#define USB_BP_0 PLA_BP_0 +#define USB_BP_1 PLA_BP_1 +#define USB_BP_2 PLA_BP_2 +#define USB_BP_3 PLA_BP_3 +#define USB_BP_4 PLA_BP_4 +#define USB_BP_5 PLA_BP_5 +#define USB_BP_6 PLA_BP_6 +#define USB_BP_7 PLA_BP_7 +#define USB_BP_EN PLA_BP_EN /* RTL8153A */ +#define USB_BP_8 0xfc38 /* RTL8153B */ #define USB_BP_9 0xfc3a #define USB_BP_10 0xfc3c #define USB_BP_11 0xfc3e @@ -346,7 +358,12 @@ /* PLA_INDICATE_FALG */ #define UPCOMING_RUNTIME_D3 BIT(0) +/* PLA_MACDBG_PRE and PLA_MACDBG_POST */ +#define DEBUG_OE BIT(0) +#define DEBUG_LTSSM 0x0082 + /* PLA_EXTRA_STATUS */ +#define U3P3_CHECK_EN BIT(7) /* RTL_VER_05 only */ #define LINK_CHANGE_FLAG BIT(8) /* USB_USB2PHY */ @@ -368,6 +385,12 @@ #define STAT_SPEED_HIGH 0x0000 #define STAT_SPEED_FULL 0x0002 +/* USB_FW_FIX_EN0 */ +#define FW_FIX_SUSPEND BIT(14) + +/* USB_FW_FIX_EN1 */ +#define FW_IP_RESET_EN BIT(9) + /* USB_LPM_CONFIG */ #define LPM_U1U2_EN BIT(0) @@ -392,12 +415,24 @@ #define OWN_UPDATE BIT(0) #define OWN_CLEAR BIT(1) +/* USB_FW_TASK */ +#define FC_PATCH_TASK BIT(1) + /* USB_UPS_CTRL */ #define POWER_CUT 0x0100 /* USB_PM_CTRL_STATUS */ #define RESUME_INDICATE 0x0001 +/* USB_CSTMR */ +#define FORCE_SUPER BIT(0) + +/* USB_FW_CTRL */ +#define FLOW_CTRL_PATCH_OPT BIT(1) + +/* USB_FC_TIMER */ +#define CTRL_TIMER_EN BIT(15) + /* USB_USB_CTRL */ #define RX_AGG_DISABLE 0x0010 #define RX_ZERO_EN 0x0080 @@ -419,6 +454,9 @@ #define COALESCE_HIGH 250000U #define COALESCE_SLOW 524280U +/* USB_WDT1_CTRL */ +#define WTD1_EN BIT(0) + /* USB_WDT11_CTRL */ #define TIMER11_EN 0x0001 @@ -570,6 +608,8 @@ enum spd_duplex { #define EFUSE 0xcfdb #define PASS_THRU_MASK 0x1 +#define BP4_SUPER_ONLY 0x1578 /* RTL_VER_04 only */ + enum rtl_register_content { _1000bps = 0x10, _100bps = 0x08, @@ -736,16 +776,16 @@ struct r8152 { struct tasklet_struct tx_tl; struct rtl_ops { - void (*init)(struct r8152 *); - int (*enable)(struct r8152 *); - void (*disable)(struct r8152 *); - void (*up)(struct r8152 *); - void (*down)(struct r8152 *); - void (*unload)(struct r8152 *); - int (*eee_get)(struct r8152 *, struct ethtool_eee *); - int (*eee_set)(struct r8152 *, struct ethtool_eee *); - bool (*in_nway)(struct r8152 *); - void (*hw_phy_cfg)(struct r8152 *); + void (*init)(struct r8152 *tp); + int (*enable)(struct r8152 *tp); + void (*disable)(struct r8152 *tp); + void (*up)(struct r8152 *tp); + void (*down)(struct r8152 *tp); + void (*unload)(struct r8152 *tp); + int (*eee_get)(struct r8152 *tp, struct ethtool_eee *eee); + int (*eee_set)(struct r8152 *tp, struct ethtool_eee *eee); + bool (*in_nway)(struct r8152 *tp); + void (*hw_phy_cfg)(struct r8152 *tp); void (*autosuspend_en)(struct r8152 *tp, bool enable); } rtl_ops; @@ -766,6 +806,19 @@ struct r8152 { u32 ctap_short_off:1; } ups_info; +#define RTL_VER_SIZE 32 + + struct rtl_fw { + const char *fw_name; + const struct firmware *fw; + + char version[RTL_VER_SIZE]; + int (*pre_fw)(struct r8152 *tp); + int (*post_fw)(struct r8152 *tp); + + bool retry; + } rtl_fw; + atomic_t rx_count; bool eee_en; @@ -788,6 +841,76 @@ struct r8152 { u8 autoneg; }; +/** + * struct fw_block - block type and total length + * @type: type of the current block, such as RTL_FW_END, RTL_FW_PLA, + * RTL_FW_USB and so on. + * @length: total length of the current block. + */ +struct fw_block { + __le32 type; + __le32 length; +} __packed; + +/** + * struct fw_header - header of the firmware file + * @checksum: checksum of sha256 which is calculated from the whole file + * except the checksum field of the file. That is, calculate sha256 + * from the version field to the end of the file. + * @version: version of this firmware. + * @blocks: the first firmware block of the file + */ +struct fw_header { + u8 checksum[32]; + char version[RTL_VER_SIZE]; + struct fw_block blocks[0]; +} __packed; + +/** + * struct fw_type_1 - a firmware block used by RTL_FW_PLA and RTL_FW_USB. + * The layout of the firmware block is: + * <struct fw_type_1> + <info> + <firmware data>. + * @fw_offset: offset of the firmware binary data. The start address of + * the data would be the address of struct fw_type_1 + @fw_offset. + * @fw_reg: the register to load the firmware. Depends on chip. + * @bp_ba_addr: the register to write break point base address. Depends on + * chip. + * @bp_ba_value: break point base address. Depends on chip. + * @bp_en_addr: the register to write break point enabled mask. Depends + * on chip. + * @bp_en_value: break point enabled mask. Depends on the firmware. + * @bp_start: the start register of break points. Depends on chip. + * @bp_num: the break point number which needs to be set for this firmware. + * Depends on the firmware. + * @bp: break points. Depends on firmware. + * @fw_ver_reg: the register to store the fw version. + * @fw_ver_data: the firmware version of the current type. + * @info: additional information for debugging, and is followed by the + * binary data of firmware. + */ +struct fw_type_1 { + struct fw_block blk_hdr; + __le16 fw_offset; + __le16 fw_reg; + __le16 bp_ba_addr; + __le16 bp_ba_value; + __le16 bp_en_addr; + __le16 bp_en_value; + __le16 bp_start; + __le16 bp_num; + __le16 bp[16]; /* any value determined by firmware */ + __le32 reserved; + __le16 fw_ver_reg; + u8 fw_ver_data; + char info[0]; +} __packed; + +enum rtl_fw_type { + RTL_FW_END = 0, + RTL_FW_PLA, + RTL_FW_USB, +}; + enum rtl_version { RTL_VER_UNKNOWN = 0, RTL_VER_01, @@ -1688,7 +1811,7 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp) } /* r8152_csum_workaround() - * The hw limites the value the transport offset. When the offset is out of the + * The hw limits the value of the transport offset. When the offset is out of * range, calculate the checksum by sw. */ static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb, @@ -2178,6 +2301,7 @@ static void tx_bottom(struct r8152 *tp) int res; do { + struct net_device *netdev = tp->netdev; struct tx_agg *agg; if (skb_queue_empty(&tp->tx_queue)) @@ -2188,24 +2312,23 @@ static void tx_bottom(struct r8152 *tp) break; res = r8152_tx_agg_fill(tp, agg); - if (res) { - struct net_device *netdev = tp->netdev; + if (!res) + continue; - if (res == -ENODEV) { - rtl_set_unplug(tp); - netif_device_detach(netdev); - } else { - struct net_device_stats *stats = &netdev->stats; - unsigned long flags; + if (res == -ENODEV) { + rtl_set_unplug(tp); + netif_device_detach(netdev); + } else { + struct net_device_stats *stats = &netdev->stats; + unsigned long flags; - netif_warn(tp, tx_err, netdev, - "failed tx_urb %d\n", res); - stats->tx_dropped += agg->skb_num; + netif_warn(tp, tx_err, netdev, + "failed tx_urb %d\n", res); + stats->tx_dropped += agg->skb_num; - spin_lock_irqsave(&tp->tx_lock, flags); - list_add_tail(&agg->list, &tp->tx_free); - spin_unlock_irqrestore(&tp->tx_lock, flags); - } + spin_lock_irqsave(&tp->tx_lock, flags); + list_add_tail(&agg->list, &tp->tx_free); + spin_unlock_irqrestore(&tp->tx_lock, flags); } } while (res == 0); } @@ -3226,6 +3349,435 @@ static void rtl_reset_bmu(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); } +/* Clear the bp to stop the firmware before loading a new one */ +static void rtl_clear_bp(struct r8152 *tp, u16 type) +{ + switch (tp->version) { + case RTL_VER_01: + case RTL_VER_02: + case RTL_VER_07: + break; + case RTL_VER_03: + case RTL_VER_04: + case RTL_VER_05: + case RTL_VER_06: + ocp_write_byte(tp, type, PLA_BP_EN, 0); + break; + case RTL_VER_08: + case RTL_VER_09: + default: + if (type == MCU_TYPE_USB) { + ocp_write_byte(tp, MCU_TYPE_USB, USB_BP2_EN, 0); + + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_8, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_9, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_10, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_11, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_12, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_13, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_14, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_15, 0); + } else { + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_BP_EN, 0); + } + break; + } + + ocp_write_word(tp, type, PLA_BP_0, 0); + ocp_write_word(tp, type, PLA_BP_1, 0); + ocp_write_word(tp, type, PLA_BP_2, 0); + ocp_write_word(tp, type, PLA_BP_3, 0); + ocp_write_word(tp, type, PLA_BP_4, 0); + ocp_write_word(tp, type, PLA_BP_5, 0); + ocp_write_word(tp, type, PLA_BP_6, 0); + ocp_write_word(tp, type, PLA_BP_7, 0); + + /* wait 3 ms to make sure the firmware is stopped */ + usleep_range(3000, 6000); + ocp_write_word(tp, type, PLA_BP_BA, 0); +} + +static bool rtl8152_is_fw_type1_ok(struct r8152 *tp, struct fw_type_1 *type1) +{ + u16 fw_reg, bp_ba_addr, bp_en_addr, bp_start; + bool rc = false; + u32 length, type; + int i, max_bp; + + type = __le32_to_cpu(type1->blk_hdr.type); + if (type == RTL_FW_PLA) { + switch (tp->version) { + case RTL_VER_01: + case RTL_VER_02: + case RTL_VER_07: + fw_reg = 0xf800; + bp_ba_addr = PLA_BP_BA; + bp_en_addr = 0; + bp_start = PLA_BP_0; + max_bp = 8; + break; + case RTL_VER_03: + case RTL_VER_04: + case RTL_VER_05: + case RTL_VER_06: + case RTL_VER_08: + case RTL_VER_09: + fw_reg = 0xf800; + bp_ba_addr = PLA_BP_BA; + bp_en_addr = PLA_BP_EN; + bp_start = PLA_BP_0; + max_bp = 8; + break; + default: + goto out; + } + } else if (type == RTL_FW_USB) { + switch (tp->version) { + case RTL_VER_03: + case RTL_VER_04: + case RTL_VER_05: + case RTL_VER_06: + fw_reg = 0xf800; + bp_ba_addr = USB_BP_BA; + bp_en_addr = USB_BP_EN; + bp_start = USB_BP_0; + max_bp = 8; + break; + case RTL_VER_08: + case RTL_VER_09: + fw_reg = 0xe600; + bp_ba_addr = USB_BP_BA; + bp_en_addr = USB_BP2_EN; + bp_start = USB_BP_0; + max_bp = 16; + break; + case RTL_VER_01: + case RTL_VER_02: + case RTL_VER_07: + default: + goto out; + } + } else { + goto out; + } + + length = __le32_to_cpu(type1->blk_hdr.length); + if (length < __le16_to_cpu(type1->fw_offset)) { + dev_err(&tp->intf->dev, "invalid fw_offset\n"); + goto out; + } + + length -= __le16_to_cpu(type1->fw_offset); + if (length < 4 || (length & 3)) { + dev_err(&tp->intf->dev, "invalid block length\n"); + goto out; + } + + if (__le16_to_cpu(type1->fw_reg) != fw_reg) { + dev_err(&tp->intf->dev, "invalid register to load firmware\n"); + goto out; + } + + if (__le16_to_cpu(type1->bp_ba_addr) != bp_ba_addr) { + dev_err(&tp->intf->dev, "invalid base address register\n"); + goto out; + } + + if (__le16_to_cpu(type1->bp_en_addr) != bp_en_addr) { + dev_err(&tp->intf->dev, "invalid enabled mask register\n"); + goto out; + } + + if (__le16_to_cpu(type1->bp_start) != bp_start) { + dev_err(&tp->intf->dev, + "invalid start register of break point\n"); + goto out; + } + + if (__le16_to_cpu(type1->bp_num) > max_bp) { + dev_err(&tp->intf->dev, "invalid break point number\n"); + goto out; + } + + for (i = __le16_to_cpu(type1->bp_num); i < max_bp; i++) { + if (type1->bp[i]) { + dev_err(&tp->intf->dev, "unused bp%u is not zero\n", i); + goto out; + } + } + + rc = true; +out: + return rc; +} + +/* Verify the checksum for the firmware file. It is calculated from the version + * field to the end of the file. Compare the result with the checksum field to + * make sure the file is correct. + */ +static long rtl8152_fw_verify_checksum(struct r8152 *tp, + struct fw_header *fw_hdr, size_t size) +{ + unsigned char checksum[sizeof(fw_hdr->checksum)]; + struct crypto_shash *alg; + struct shash_desc *sdesc; + size_t len; + long rc; + + alg = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(alg)) { + rc = PTR_ERR(alg); + goto out; + } + + if (crypto_shash_digestsize(alg) != sizeof(fw_hdr->checksum)) { + rc = -EFAULT; + dev_err(&tp->intf->dev, "digestsize incorrect (%u)\n", + crypto_shash_digestsize(alg)); + goto free_shash; + } + + len = sizeof(*sdesc) + crypto_shash_descsize(alg); + sdesc = kmalloc(len, GFP_KERNEL); + if (!sdesc) { + rc = -ENOMEM; + goto free_shash; + } + sdesc->tfm = alg; + + len = size - sizeof(fw_hdr->checksum); + rc = crypto_shash_digest(sdesc, fw_hdr->version, len, checksum); + kfree(sdesc); + if (rc) + goto free_shash; + + if (memcmp(fw_hdr->checksum, checksum, sizeof(fw_hdr->checksum))) { + dev_err(&tp->intf->dev, "checksum fail\n"); + rc = -EFAULT; + } + +free_shash: + crypto_free_shash(alg); +out: + return rc; +} + +static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw) +{ + const struct firmware *fw = rtl_fw->fw; + struct fw_header *fw_hdr = (struct fw_header *)fw->data; + struct fw_type_1 *pla = NULL, *usb = NULL; + long ret = -EFAULT; + int i; + + if (fw->size < sizeof(*fw_hdr)) { + dev_err(&tp->intf->dev, "file too small\n"); + goto fail; + } + + ret = rtl8152_fw_verify_checksum(tp, fw_hdr, fw->size); + if (ret) + goto fail; + + ret = -EFAULT; + + for (i = sizeof(*fw_hdr); i < fw->size;) { + struct fw_block *block = (struct fw_block *)&fw->data[i]; + u32 type; + + if ((i + sizeof(*block)) > fw->size) + goto fail; + + type = __le32_to_cpu(block->type); + switch (type) { + case RTL_FW_END: + if (__le32_to_cpu(block->length) != sizeof(*block)) + goto fail; + goto success; + case RTL_FW_PLA: + if (pla) { + dev_err(&tp->intf->dev, + "multiple PLA firmware encountered"); + goto fail; + } + + pla = (struct fw_type_1 *)block; + if (!rtl8152_is_fw_type1_ok(tp, pla)) { + dev_err(&tp->intf->dev, + "load PLA firmware failed\n"); + goto fail; + } + break; + case RTL_FW_USB: + if (usb) { + dev_err(&tp->intf->dev, + "multiple USB firmware encountered"); + goto fail; + } + + usb = (struct fw_type_1 *)block; + if (!rtl8152_is_fw_type1_ok(tp, usb)) { + dev_err(&tp->intf->dev, + "load USB firmware failed\n"); + goto fail; + } + break; + default: + dev_warn(&tp->intf->dev, "Unknown type %u is found\n", + type); + break; + } + + /* next block */ + i += ALIGN(__le32_to_cpu(block->length), 8); + } + +success: + return 0; +fail: + return ret; +} + +static void rtl8152_fw_type_1_apply(struct r8152 *tp, struct fw_type_1 *type1) +{ + u16 bp_en_addr, bp_index, type, bp_num, fw_ver_reg; + u32 length; + u8 *data; + int i; + + switch (__le32_to_cpu(type1->blk_hdr.type)) { + case RTL_FW_PLA: + type = MCU_TYPE_PLA; + break; + case RTL_FW_USB: + type = MCU_TYPE_USB; + break; + default: + return; + } + + rtl_clear_bp(tp, type); + + /* Enable backup/restore of MACDBG. This is required after clearing PLA + * break points and before applying the PLA firmware. + */ + if (tp->version == RTL_VER_04 && type == MCU_TYPE_PLA && + !(ocp_read_word(tp, MCU_TYPE_PLA, PLA_MACDBG_POST) & DEBUG_OE)) { + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MACDBG_PRE, DEBUG_LTSSM); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MACDBG_POST, DEBUG_LTSSM); + } + + length = __le32_to_cpu(type1->blk_hdr.length); + length -= __le16_to_cpu(type1->fw_offset); + + data = (u8 *)type1; + data += __le16_to_cpu(type1->fw_offset); + + generic_ocp_write(tp, __le16_to_cpu(type1->fw_reg), 0xff, length, data, + type); + + ocp_write_word(tp, type, __le16_to_cpu(type1->bp_ba_addr), + __le16_to_cpu(type1->bp_ba_value)); + + bp_index = __le16_to_cpu(type1->bp_start); + bp_num = __le16_to_cpu(type1->bp_num); + for (i = 0; i < bp_num; i++) { + ocp_write_word(tp, type, bp_index, __le16_to_cpu(type1->bp[i])); + bp_index += 2; + } + + bp_en_addr = __le16_to_cpu(type1->bp_en_addr); + if (bp_en_addr) + ocp_write_word(tp, type, bp_en_addr, + __le16_to_cpu(type1->bp_en_value)); + + fw_ver_reg = __le16_to_cpu(type1->fw_ver_reg); + if (fw_ver_reg) + ocp_write_byte(tp, MCU_TYPE_USB, fw_ver_reg, + type1->fw_ver_data); + + dev_dbg(&tp->intf->dev, "successfully applied %s\n", type1->info); +} + +static void rtl8152_apply_firmware(struct r8152 *tp) +{ + struct rtl_fw *rtl_fw = &tp->rtl_fw; + const struct firmware *fw = rtl_fw->fw; + struct fw_header *fw_hdr = (struct fw_header *)fw->data; + int i; + + if (IS_ERR_OR_NULL(rtl_fw->fw)) + return; + + if (rtl_fw->pre_fw) + rtl_fw->pre_fw(tp); + + for (i = offsetof(struct fw_header, blocks); i < fw->size;) { + struct fw_block *block = (struct fw_block *)&fw->data[i]; + + switch (__le32_to_cpu(block->type)) { + case RTL_FW_END: + goto post_fw; + case RTL_FW_PLA: + case RTL_FW_USB: + rtl8152_fw_type_1_apply(tp, (struct fw_type_1 *)block); + break; + default: + break; + } + + i += ALIGN(__le32_to_cpu(block->length), 8); + } + +post_fw: + if (rtl_fw->post_fw) + rtl_fw->post_fw(tp); + + strscpy(rtl_fw->version, fw_hdr->version, RTL_VER_SIZE); + dev_info(&tp->intf->dev, "load %s successfully\n", rtl_fw->version); +} + +static void rtl8152_release_firmware(struct r8152 *tp) +{ + struct rtl_fw *rtl_fw = &tp->rtl_fw; + + if (!IS_ERR_OR_NULL(rtl_fw->fw)) { + release_firmware(rtl_fw->fw); + rtl_fw->fw = NULL; + } +} + +static int rtl8152_request_firmware(struct r8152 *tp) +{ + struct rtl_fw *rtl_fw = &tp->rtl_fw; + long rc; + + if (rtl_fw->fw || !rtl_fw->fw_name) { + dev_info(&tp->intf->dev, "skip request firmware\n"); + rc = 0; + goto result; + } + + rc = request_firmware(&rtl_fw->fw, rtl_fw->fw_name, &tp->intf->dev); + if (rc < 0) + goto result; + + rc = rtl8152_check_firmware(tp, rtl_fw); + if (rc < 0) + release_firmware(rtl_fw->fw); + +result: + if (rc) { + rtl_fw->fw = ERR_PTR(rc); + + dev_warn(&tp->intf->dev, + "unable to load firmware patch %s (%ld)\n", + rtl_fw->fw_name, rc); + } + + return rc; +} + static void r8152_aldps_en(struct r8152 *tp, bool enable) { if (enable) { @@ -3370,6 +3922,7 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { + rtl8152_apply_firmware(tp); rtl_eee_enable(tp, tp->eee_en); r8152_aldps_en(tp, true); r8152b_enable_fc(tp); @@ -3377,11 +3930,23 @@ static void r8152b_hw_phy_cfg(struct r8152 *tp) set_bit(PHY_RESET, &tp->flags); } -static void r8152b_exit_oob(struct r8152 *tp) +static void wait_oob_link_list_ready(struct r8152 *tp) { u32 ocp_data; int i; + for (i = 0; i < 1000; i++) { + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); + if (ocp_data & LINK_LIST_READY) + break; + usleep_range(1000, 2000); + } +} + +static void r8152b_exit_oob(struct r8152 *tp) +{ + u32 ocp_data; + ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); ocp_data &= ~RCR_ACPT_ALL; ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); @@ -3399,23 +3964,13 @@ static void r8152b_exit_oob(struct r8152 *tp) ocp_data &= ~MCU_BORW_EN; ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); ocp_data |= RE_INIT_LL; ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); rtl8152_nic_reset(tp); @@ -3457,7 +4012,6 @@ static void r8152b_exit_oob(struct r8152 *tp) static void r8152b_enter_oob(struct r8152 *tp) { u32 ocp_data; - int i; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); ocp_data &= ~NOW_IS_OOB; @@ -3469,23 +4023,13 @@ static void r8152b_enter_oob(struct r8152 *tp) rtl_disable(tp); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); ocp_data |= RE_INIT_LL; ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8152_RMS); @@ -3533,6 +4077,126 @@ static int r8153_patch_request(struct r8152 *tp, bool request) } } +static int r8153_pre_firmware_1(struct r8152 *tp) +{ + int i; + + /* Wait till the WTD timer is ready. It would take at most 104 ms. */ + for (i = 0; i < 104; i++) { + u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_WDT1_CTRL); + + if (!(ocp_data & WTD1_EN)) + break; + usleep_range(1000, 2000); + } + + return 0; +} + +static int r8153_post_firmware_1(struct r8152 *tp) +{ + /* set USB_BP_4 to support USB_SPEED_SUPER only */ + if (ocp_read_byte(tp, MCU_TYPE_USB, USB_CSTMR) & FORCE_SUPER) + ocp_write_word(tp, MCU_TYPE_USB, USB_BP_4, BP4_SUPER_ONLY); + + /* reset UPHY timer to 36 ms */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_UPHY_TIMER, 36000 / 16); + + return 0; +} + +static int r8153_pre_firmware_2(struct r8152 *tp) +{ + u32 ocp_data; + + r8153_pre_firmware_1(tp); + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0); + ocp_data &= ~FW_FIX_SUSPEND; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, ocp_data); + + return 0; +} + +static int r8153_post_firmware_2(struct r8152 *tp) +{ + u32 ocp_data; + + /* enable bp0 if support USB_SPEED_SUPER only */ + if (ocp_read_byte(tp, MCU_TYPE_USB, USB_CSTMR) & FORCE_SUPER) { + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BP_EN); + ocp_data |= BIT(0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_BP_EN, ocp_data); + } + + /* reset UPHY timer to 36 ms */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_UPHY_TIMER, 36000 / 16); + + /* enable U3P3 check, set the counter to 4 */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, U3P3_CHECK_EN | 4); + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0); + ocp_data |= FW_FIX_SUSPEND; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, ocp_data); + + ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY); + ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND; + ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data); + + return 0; +} + +static int r8153_post_firmware_3(struct r8152 *tp) +{ + u32 ocp_data; + + ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY); + ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND; + ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data); + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1); + ocp_data |= FW_IP_RESET_EN; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data); + + return 0; +} + +static int r8153b_pre_firmware_1(struct r8152 *tp) +{ + /* enable fc timer and set timer to 1 second. */ + ocp_write_word(tp, MCU_TYPE_USB, USB_FC_TIMER, + CTRL_TIMER_EN | (1000 / 8)); + + return 0; +} + +static int r8153b_post_firmware_1(struct r8152 *tp) +{ + u32 ocp_data; + + /* enable bp0 for RTL8153-BND */ + ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1); + if (ocp_data & BND_MASK) { + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BP_EN); + ocp_data |= BIT(0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_BP_EN, ocp_data); + } + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL); + ocp_data |= FLOW_CTRL_PATCH_OPT; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data); + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); + ocp_data |= FC_PATCH_TASK; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1); + ocp_data |= FW_IP_RESET_EN; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data); + + return 0; +} + static void r8153_aldps_en(struct r8152 *tp, bool enable) { u16 data; @@ -3567,6 +4231,8 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) /* disable EEE before updating the PHY parameters */ rtl_eee_enable(tp, false); + rtl8152_apply_firmware(tp); + if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3639,6 +4305,8 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) /* disable EEE before updating the PHY parameters */ rtl_eee_enable(tp, false); + rtl8152_apply_firmware(tp); + r8153b_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags)); data = sram_read(tp, SRAM_GREEN_CFG); @@ -3711,7 +4379,6 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) static void r8153_first_init(struct r8152 *tp) { u32 ocp_data; - int i; r8153_mac_clk_spd(tp, false); rxdy_gated_en(tp, true); @@ -3732,23 +4399,13 @@ static void r8153_first_init(struct r8152 *tp) ocp_data &= ~MCU_BORW_EN; ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); ocp_data |= RE_INIT_LL; ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX); @@ -3773,7 +4430,6 @@ static void r8153_first_init(struct r8152 *tp) static void r8153_enter_oob(struct r8152 *tp) { u32 ocp_data; - int i; r8153_mac_clk_spd(tp, true); @@ -3784,23 +4440,13 @@ static void r8153_enter_oob(struct r8152 *tp) rtl_disable(tp); rtl_reset_bmu(tp); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); ocp_data |= RE_INIT_LL; ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); - for (i = 0; i < 1000; i++) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - if (ocp_data & LINK_LIST_READY) - break; - usleep_range(1000, 2000); - } + wait_oob_link_list_ready(tp); ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data); @@ -4187,11 +4833,22 @@ static void rtl_hw_phy_work_func_t(struct work_struct *work) mutex_lock(&tp->control); + if (rtl8152_request_firmware(tp) == -ENODEV && tp->rtl_fw.retry) { + tp->rtl_fw.retry = false; + tp->rtl_fw.fw = NULL; + + /* Delay execution in case request_firmware() is not ready yet. + */ + queue_delayed_work(system_long_wq, &tp->hw_phy_work, HZ * 10); + goto ignore_once; + } + tp->rtl_ops.hw_phy_cfg(tp); rtl8152_set_speed(tp, tp->autoneg, tp->speed, tp->duplex, tp->advertising); +ignore_once: mutex_unlock(&tp->control); usb_autopm_put_interface(tp->intf); @@ -4229,6 +4886,11 @@ static int rtl8152_open(struct net_device *netdev) struct r8152 *tp = netdev_priv(netdev); int res = 0; + if (work_busy(&tp->hw_phy_work.work) & WORK_BUSY_PENDING) { + cancel_delayed_work_sync(&tp->hw_phy_work); + rtl_hw_phy_work_func_t(&tp->hw_phy_work.work); + } + res = alloc_all_mem(tp); if (res) goto out; @@ -4875,6 +5537,9 @@ static void rtl8152_get_drvinfo(struct net_device *netdev, strlcpy(info->driver, MODULENAME, sizeof(info->driver)); strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); usb_make_path(tp->udev, info->bus_info, sizeof(info->bus_info)); + if (!IS_ERR_OR_NULL(tp->rtl_fw.fw)) + strlcpy(info->fw_version, tp->rtl_fw.version, + sizeof(info->fw_version)); } static @@ -5499,6 +6164,47 @@ static int rtl_ops_init(struct r8152 *tp) return ret; } +#define FIRMWARE_8153A_2 "rtl_nic/rtl8153a-2.fw" +#define FIRMWARE_8153A_3 "rtl_nic/rtl8153a-3.fw" +#define FIRMWARE_8153A_4 "rtl_nic/rtl8153a-4.fw" +#define FIRMWARE_8153B_2 "rtl_nic/rtl8153b-2.fw" + +MODULE_FIRMWARE(FIRMWARE_8153A_2); +MODULE_FIRMWARE(FIRMWARE_8153A_3); +MODULE_FIRMWARE(FIRMWARE_8153A_4); +MODULE_FIRMWARE(FIRMWARE_8153B_2); + +static int rtl_fw_init(struct r8152 *tp) +{ + struct rtl_fw *rtl_fw = &tp->rtl_fw; + + switch (tp->version) { + case RTL_VER_04: + rtl_fw->fw_name = FIRMWARE_8153A_2; + rtl_fw->pre_fw = r8153_pre_firmware_1; + rtl_fw->post_fw = r8153_post_firmware_1; + break; + case RTL_VER_05: + rtl_fw->fw_name = FIRMWARE_8153A_3; + rtl_fw->pre_fw = r8153_pre_firmware_2; + rtl_fw->post_fw = r8153_post_firmware_2; + break; + case RTL_VER_06: + rtl_fw->fw_name = FIRMWARE_8153A_4; + rtl_fw->post_fw = r8153_post_firmware_3; + break; + case RTL_VER_09: + rtl_fw->fw_name = FIRMWARE_8153B_2; + rtl_fw->pre_fw = r8153b_pre_firmware_1; + rtl_fw->post_fw = r8153b_post_firmware_1; + break; + default: + break; + } + + return 0; +} + static u8 rtl_get_version(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); @@ -5606,6 +6312,8 @@ static int rtl8152_probe(struct usb_interface *intf, if (ret) goto out; + rtl_fw_init(tp); + mutex_init(&tp->control); INIT_DELAYED_WORK(&tp->schedule, rtl_work_func_t); INIT_DELAYED_WORK(&tp->hw_phy_work, rtl_hw_phy_work_func_t); @@ -5633,7 +6341,8 @@ static int rtl8152_probe(struct usb_interface *intf, } if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && - (!strcmp(udev->serial, "000001000000") || !strcmp(udev->serial, "000002000000"))) { + (!strcmp(udev->serial, "000001000000") || + !strcmp(udev->serial, "000002000000"))) { dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation"); set_bit(DELL_TB_RX_AGG_BUG, &tp->flags); } @@ -5676,6 +6385,10 @@ static int rtl8152_probe(struct usb_interface *intf, intf->needs_remote_wakeup = 1; tp->rtl_ops.init(tp); +#if IS_BUILTIN(CONFIG_USB_RTL8152) + /* Retry in case request_firmware() is not ready yet. */ + tp->rtl_fw.retry = true; +#endif queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); set_ethernet_addr(tp); @@ -5721,6 +6434,7 @@ static void rtl8152_disconnect(struct usb_interface *intf) tasklet_kill(&tp->tx_tl); cancel_delayed_work_sync(&tp->hw_phy_work); tp->rtl_ops.unload(tp); + rtl8152_release_firmware(tp); free_netdev(tp->netdev); } } diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a82ad739ab80..791f6633667c 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1674,7 +1674,7 @@ static int ath9k_htc_ampdu_action(struct ieee80211_hw *hw, case IEEE80211_AMPDU_TX_START: ret = ath9k_htc_tx_aggr_oper(priv, vif, sta, action, tid); if (!ret) - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 34121fbf32e3..0548aa3702e3 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1921,7 +1921,7 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw, ath9k_ps_wakeup(sc); ret = ath_tx_aggr_start(sc, sta, tid, ssn); if (!ret) - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; ath9k_ps_restore(sc); break; case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 40a8054f8aa6..5914926a5c5b 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1449,8 +1449,7 @@ static int carl9170_op_ampdu_action(struct ieee80211_hw *hw, rcu_assign_pointer(sta_info->agg[tid], tid_info); spin_unlock_bh(&ar->tx_ampdu_list_lock); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 79998a3ddb7a..a276dae30887 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1084,6 +1084,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, enum ieee80211_ampdu_mlme_action action = params->action; u16 tid = params->tid; u16 *ssn = ¶ms->ssn; + int ret = 0; wcn36xx_dbg(WCN36XX_DBG_MAC, "mac ampdu action action %d tid %d\n", action, tid); @@ -1106,7 +1107,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, sta_priv->ampdu_state[tid] = WCN36XX_AMPDU_START; spin_unlock_bh(&sta_priv->ampdu_lock); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_OPERATIONAL: spin_lock_bh(&sta_priv->ampdu_lock); @@ -1131,7 +1132,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, mutex_unlock(&wcn->conf_mutex); - return 0; + return ret; } static const struct ieee80211_ops wcn36xx_ops = { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 6188275b17e5..8e8b685cfe09 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -850,8 +850,7 @@ brcms_ops_ampdu_action(struct ieee80211_hw *hw, "START: tid %d is not agg\'able\n", tid); return -EINVAL; } - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index ffb705b18fb1..51fdd7ce30af 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -2265,7 +2265,7 @@ il4965_tx_agg_start(struct il_priv *il, struct ieee80211_vif *vif, if (tid_data->tfds_in_queue == 0) { D_HT("HW queue is empty\n"); tid_data->agg.state = IL_AGG_ON; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; } else { D_HT("HW queue is NOT empty: %d packets in HW queue\n", tid_data->tfds_in_queue); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c index 3029e3f6de63..cd73fc5cfcbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c @@ -621,7 +621,7 @@ int iwlagn_tx_agg_start(struct iwl_priv *priv, struct ieee80211_vif *vif, IWL_DEBUG_TX_QUEUES(priv, "Can proceed: ssn = next_recl = %d\n", tid_data->agg.ssn); tid_data->agg.state = IWL_AGG_STARTING; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; } else { IWL_DEBUG_TX_QUEUES(priv, "Can't proceed: ssn %d, " "next_reclaimed = %d\n", diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 0bedba4c61f2..1d6bc62b104c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2818,13 +2818,12 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (normalized_ssn == tid_data->next_reclaimed) { tid_data->state = IWL_AGG_STARTING; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; } else { tid_data->state = IWL_EMPTYING_HW_QUEUE_ADDBA; + ret = 0; } - ret = 0; - out: spin_unlock_bh(&mvmsta->lock); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 14f562cd715c..31ae6c4be3a7 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -148,23 +148,25 @@ static const char *hwsim_alpha2s[] = { }; static const struct ieee80211_regdomain hwsim_world_regdom_custom_01 = { - .n_reg_rules = 4, + .n_reg_rules = 5, .alpha2 = "99", .reg_rules = { REG_RULE(2412-10, 2462+10, 40, 0, 20, 0), REG_RULE(2484-10, 2484+10, 40, 0, 20, 0), REG_RULE(5150-10, 5240+10, 40, 0, 30, 0), REG_RULE(5745-10, 5825+10, 40, 0, 30, 0), + REG_RULE(5855-10, 5925+10, 40, 0, 33, 0), } }; static const struct ieee80211_regdomain hwsim_world_regdom_custom_02 = { - .n_reg_rules = 2, + .n_reg_rules = 3, .alpha2 = "99", .reg_rules = { REG_RULE(2412-10, 2462+10, 40, 0, 20, 0), REG_RULE(5725-10, 5850+10, 40, 0, 30, NL80211_RRF_NO_IR), + REG_RULE(5855-10, 5925+10, 40, 0, 33, 0), } }; @@ -354,6 +356,24 @@ static const struct ieee80211_channel hwsim_channels_5ghz[] = { CHAN5G(5805), /* Channel 161 */ CHAN5G(5825), /* Channel 165 */ CHAN5G(5845), /* Channel 169 */ + + CHAN5G(5855), /* Channel 171 */ + CHAN5G(5860), /* Channel 172 */ + CHAN5G(5865), /* Channel 173 */ + CHAN5G(5870), /* Channel 174 */ + + CHAN5G(5875), /* Channel 175 */ + CHAN5G(5880), /* Channel 176 */ + CHAN5G(5885), /* Channel 177 */ + CHAN5G(5890), /* Channel 178 */ + CHAN5G(5895), /* Channel 179 */ + CHAN5G(5900), /* Channel 180 */ + CHAN5G(5905), /* Channel 181 */ + + CHAN5G(5910), /* Channel 182 */ + CHAN5G(5915), /* Channel 183 */ + CHAN5G(5920), /* Channel 184 */ + CHAN5G(5925), /* Channel 185 */ }; static const struct ieee80211_rate hwsim_rates[] = { @@ -1550,7 +1570,8 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, if (vif->type != NL80211_IFTYPE_AP && vif->type != NL80211_IFTYPE_MESH_POINT && - vif->type != NL80211_IFTYPE_ADHOC) + vif->type != NL80211_IFTYPE_ADHOC && + vif->type != NL80211_IFTYPE_OCB) return; skb = ieee80211_beacon_get(hw, vif); @@ -1604,6 +1625,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer) } static const char * const hwsim_chanwidths[] = { + [NL80211_CHAN_WIDTH_5] = "ht5", + [NL80211_CHAN_WIDTH_10] = "ht10", [NL80211_CHAN_WIDTH_20_NOHT] = "noht", [NL80211_CHAN_WIDTH_20] = "ht20", [NL80211_CHAN_WIDTH_40] = "ht40", @@ -1979,8 +2002,7 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_TX_START: - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: @@ -2723,7 +2745,8 @@ static void mac80211_hwsim_he_capab(struct ieee80211_supported_band *sband) BIT(NL80211_IFTYPE_P2P_CLIENT) | \ BIT(NL80211_IFTYPE_P2P_GO) | \ BIT(NL80211_IFTYPE_ADHOC) | \ - BIT(NL80211_IFTYPE_MESH_POINT)) + BIT(NL80211_IFTYPE_MESH_POINT) | \ + BIT(NL80211_IFTYPE_OCB)) static int mac80211_hwsim_new_radio(struct genl_info *info, struct hwsim_new_radio_params *param) @@ -2847,6 +2870,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, } else { data->if_combination.num_different_channels = 1; data->if_combination.radar_detect_widths = + BIT(NL80211_CHAN_WIDTH_5) | + BIT(NL80211_CHAN_WIDTH_10) | BIT(NL80211_CHAN_WIDTH_20_NOHT) | BIT(NL80211_CHAN_WIDTH_20) | BIT(NL80211_CHAN_WIDTH_40) | diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index c4db6417748f..d55f229abeea 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -5520,7 +5520,7 @@ mwl8k_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, rc = -EBUSY; break; } - ieee80211_start_tx_ba_cb_irqsafe(vif, addr, tid); + rc = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 25d5b1608bc9..4b3217b43a04 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -582,8 +582,7 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 87c748715b5d..b6d78212306a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -477,8 +477,7 @@ mt7615_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; mt7615_mcu_set_tx_ba(dev, params, 0); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index aec73a0295e8..414b22399d93 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -393,8 +393,7 @@ int mt76x02_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn); - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: mtxq->aggr = false; ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index 72e608cc53af..671d8897ae76 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -372,8 +372,7 @@ mt76_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, break; case IEEE80211_AMPDU_TX_START: msta->agg_ssn[tid] = ssn << 4; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index f1cdcd61c54a..25466454b73e 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -10476,7 +10476,7 @@ int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, * when the hw reorders frames due to aggregation. */ if (sta_priv->wcid > WCID_END) - return 1; + return -ENOSPC; switch (action) { case IEEE80211_AMPDU_RX_START: @@ -10489,7 +10489,7 @@ int rt2800_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, */ break; case IEEE80211_AMPDU_TX_START: - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ret = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index ac746c322554..c75192c4447f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1776,8 +1776,7 @@ int rtl_tx_agg_start(struct ieee80211_hw *hw, struct ieee80211_vif *vif, tid_data->agg.agg_state = RTL_AGG_START; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - return 0; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; } int rtl_tx_agg_stop(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index e5e3605bb693..a203b4705b94 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -437,8 +437,7 @@ static int rtw_ops_ampdu_action(struct ieee80211_hw *hw, switch (params->action) { case IEEE80211_AMPDU_TX_START: - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; + return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index ce5e92d82efc..440088293aff 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1140,8 +1140,7 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, else if ((vif->type == NL80211_IFTYPE_AP) || (vif->type == NL80211_IFTYPE_P2P_GO)) rsta->seq_start[tid] = seq_no; - ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); - status = 0; + status = IEEE80211_AMPDU_TX_START_IMMEDIATE; break; case IEEE80211_AMPDU_TX_STOP_CONT: diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 0f22379887ca..18cd96284b77 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -278,7 +278,6 @@ static struct i2c_driver nfcmrvl_i2c_driver = { .remove = nfcmrvl_i2c_remove, .driver = { .name = "nfcmrvl_i2c", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(of_nfcmrvl_i2c_match), }, }; diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c index e4f7fa00862d..b4eb926d220a 100644 --- a/drivers/nfc/s3fwrn5/i2c.c +++ b/drivers/nfc/s3fwrn5/i2c.c @@ -279,7 +279,6 @@ MODULE_DEVICE_TABLE(of, of_s3fwrn5_i2c_match); static struct i2c_driver s3fwrn5_i2c_driver = { .driver = { - .owner = THIS_MODULE, .name = S3FWRN5_I2C_DRIVER_NAME, .of_match_table = of_match_ptr(of_s3fwrn5_i2c_match), }, diff --git a/drivers/ptp/ptp_dte.c b/drivers/ptp/ptp_dte.c index 0dcfdc806f57..82d31ba32690 100644 --- a/drivers/ptp/ptp_dte.c +++ b/drivers/ptp/ptp_dte.c @@ -240,14 +240,12 @@ static int ptp_dte_probe(struct platform_device *pdev) { struct ptp_dte *ptp_dte; struct device *dev = &pdev->dev; - struct resource *res; ptp_dte = devm_kzalloc(dev, sizeof(struct ptp_dte), GFP_KERNEL); if (!ptp_dte) return -ENOMEM; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ptp_dte->regs = devm_ioremap_resource(dev, res); + ptp_dte->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ptp_dte->regs)) return PTR_ERR(ptp_dte->regs); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f9502dbbb5c1..9bb36c32cbf9 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1171,6 +1171,11 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, spi_statistics_add_transfer_stats(statm, xfer, ctlr); spi_statistics_add_transfer_stats(stats, xfer, ctlr); + if (!ctlr->ptp_sts_supported) { + xfer->ptp_sts_word_pre = 0; + ptp_read_system_prets(xfer->ptp_sts); + } + if (xfer->tx_buf || xfer->rx_buf) { reinit_completion(&ctlr->xfer_completion); @@ -1197,6 +1202,11 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, xfer->len); } + if (!ctlr->ptp_sts_supported) { + ptp_read_system_postts(xfer->ptp_sts); + xfer->ptp_sts_word_post = xfer->len; + } + trace_spi_transfer_stop(msg, xfer); if (msg->status != -EINPROGRESS) @@ -1265,6 +1275,7 @@ EXPORT_SYMBOL_GPL(spi_finalize_current_transfer); */ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) { + struct spi_transfer *xfer; struct spi_message *msg; bool was_busy = false; unsigned long flags; @@ -1391,6 +1402,13 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) goto out; } + if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) { + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + xfer->ptp_sts_word_pre = 0; + ptp_read_system_prets(xfer->ptp_sts); + } + } + ret = ctlr->transfer_one_message(ctlr, msg); if (ret) { dev_err(&ctlr->dev, @@ -1419,6 +1437,99 @@ static void spi_pump_messages(struct kthread_work *work) } /** + * spi_take_timestamp_pre - helper for drivers to collect the beginning of the + * TX timestamp for the requested byte from the SPI + * transfer. The frequency with which this function + * must be called (once per word, once for the whole + * transfer, once per batch of words etc) is arbitrary + * as long as the @tx buffer offset is greater than or + * equal to the requested byte at the time of the + * call. The timestamp is only taken once, at the + * first such call. It is assumed that the driver + * advances its @tx buffer pointer monotonically. + * @ctlr: Pointer to the spi_controller structure of the driver + * @xfer: Pointer to the transfer being timestamped + * @tx: Pointer to the current word within the xfer->tx_buf that the driver is + * preparing to transmit right now. + * @irqs_off: If true, will disable IRQs and preemption for the duration of the + * transfer, for less jitter in time measurement. Only compatible + * with PIO drivers. If true, must follow up with + * spi_take_timestamp_post or otherwise system will crash. + * WARNING: for fully predictable results, the CPU frequency must + * also be under control (governor). + */ +void spi_take_timestamp_pre(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off) +{ + u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8); + + if (!xfer->ptp_sts) + return; + + if (xfer->timestamped_pre) + return; + + if (tx < (xfer->tx_buf + xfer->ptp_sts_word_pre * bytes_per_word)) + return; + + /* Capture the resolution of the timestamp */ + xfer->ptp_sts_word_pre = (tx - xfer->tx_buf) / bytes_per_word; + + xfer->timestamped_pre = true; + + if (irqs_off) { + local_irq_save(ctlr->irq_flags); + preempt_disable(); + } + + ptp_read_system_prets(xfer->ptp_sts); +} +EXPORT_SYMBOL_GPL(spi_take_timestamp_pre); + +/** + * spi_take_timestamp_post - helper for drivers to collect the end of the + * TX timestamp for the requested byte from the SPI + * transfer. Can be called with an arbitrary + * frequency: only the first call where @tx exceeds + * or is equal to the requested word will be + * timestamped. + * @ctlr: Pointer to the spi_controller structure of the driver + * @xfer: Pointer to the transfer being timestamped + * @tx: Pointer to the current word within the xfer->tx_buf that the driver has + * just transmitted. + * @irqs_off: If true, will re-enable IRQs and preemption for the local CPU. + */ +void spi_take_timestamp_post(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off) +{ + u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8); + + if (!xfer->ptp_sts) + return; + + if (xfer->timestamped_post) + return; + + if (tx < (xfer->tx_buf + xfer->ptp_sts_word_post * bytes_per_word)) + return; + + ptp_read_system_postts(xfer->ptp_sts); + + if (irqs_off) { + local_irq_restore(ctlr->irq_flags); + preempt_enable(); + } + + /* Capture the resolution of the timestamp */ + xfer->ptp_sts_word_post = (tx - xfer->tx_buf) / bytes_per_word; + + xfer->timestamped_post = true; +} +EXPORT_SYMBOL_GPL(spi_take_timestamp_post); + +/** * spi_set_thread_rt - set the controller to pump at realtime priority * @ctlr: controller to boost priority of * @@ -1503,6 +1614,7 @@ EXPORT_SYMBOL_GPL(spi_get_next_queued_message); */ void spi_finalize_current_message(struct spi_controller *ctlr) { + struct spi_transfer *xfer; struct spi_message *mesg; unsigned long flags; int ret; @@ -1511,6 +1623,13 @@ void spi_finalize_current_message(struct spi_controller *ctlr) mesg = ctlr->cur_msg; spin_unlock_irqrestore(&ctlr->queue_lock, flags); + if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) { + list_for_each_entry(xfer, &mesg->transfers, transfer_list) { + ptp_read_system_postts(xfer->ptp_sts); + xfer->ptp_sts_word_post = xfer->len; + } + } + spi_unmap_msg(ctlr, mesg); if (ctlr->cur_msg_prepared && ctlr->unprepare_message) { @@ -3273,6 +3392,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) static int __spi_async(struct spi_device *spi, struct spi_message *message) { struct spi_controller *ctlr = spi->controller; + struct spi_transfer *xfer; /* * Some controllers do not support doing regular SPI transfers. Return @@ -3288,6 +3408,13 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message) trace_spi_message_submit(message); + if (!ctlr->ptp_sts_supported) { + list_for_each_entry(xfer, &message->transfers, transfer_list) { + xfer->ptp_sts_word_pre = 0; + ptp_read_system_prets(xfer->ptp_sts); + } + } + return ctlr->transfer(spi, message); } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b9d22338606..282e28bf41ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -363,7 +363,7 @@ struct bpf_prog_stats { u64 cnt; u64 nsecs; struct u64_stats_sync syncp; -}; +} __aligned(2 * sizeof(u64)); struct bpf_prog_aux { atomic_t refcnt; diff --git a/include/linux/dim.h b/include/linux/dim.h index 9fa4b3f88c39..b698266d0035 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -4,22 +4,26 @@ #ifndef DIM_H #define DIM_H +#include <linux/bits.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/types.h> +#include <linux/workqueue.h> -/** +/* * Number of events between DIM iterations. * Causes a moderation of the algorithm run. */ #define DIM_NEVENTS 64 -/** +/* * Is a difference between values justifies taking an action. * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ (((100UL * abs((val) - (ref))) / (ref)) > 10) -/** +/* * Calculate the gap between two values. * Take wrap-around and variable size into consideration. */ @@ -27,12 +31,13 @@ & (BIT_ULL(bits) - 1)) /** - * Structure for CQ moderation values. + * struct dim_cq_moder - Structure for CQ moderation values. * Used for communications between DIM and its consumer. * * @usec: CQ timer suggestion (by DIM) * @pkts: CQ packet counter suggestion (by DIM) - * @cq_period_mode: CQ priod count mode (from CQE/EQE) + * @comps: Completion counter + * @cq_period_mode: CQ period count mode (from CQE/EQE) */ struct dim_cq_moder { u16 usec; @@ -42,13 +47,14 @@ struct dim_cq_moder { }; /** - * Structure for DIM sample data. + * struct dim_sample - Structure for DIM sample data. * Used for communications between DIM and its consumer. * * @time: Sample timestamp * @pkt_ctr: Number of packets * @byte_ctr: Number of bytes * @event_ctr: Number of events + * @comp_ctr: Current completion counter */ struct dim_sample { ktime_t time; @@ -59,12 +65,14 @@ struct dim_sample { }; /** - * Structure for DIM stats. + * struct dim_stats - Structure for DIM stats. * Used for holding current measured rates. * * @ppms: Packets per msec * @bpms: Bytes per msec * @epms: Events per msec + * @cpms: Completions per msec + * @cpe_ratio: Ratio of completions to events */ struct dim_stats { int ppms; /* packets per msec */ @@ -75,12 +83,13 @@ struct dim_stats { }; /** - * Main structure for dynamic interrupt moderation (DIM). + * struct dim - Main structure for dynamic interrupt moderation (DIM). * Used for holding all information about a specific DIM instance. * * @state: Algorithm state (see below) * @prev_stats: Measured rates from previous iteration (for comparison) * @start_sample: Sampled data at start of current iteration + * @measuring_sample: A &dim_sample that is used to update the current events * @work: Work to perform on action required * @priv: A pointer to the struct that points to dim * @profile_ix: Current moderation profile @@ -106,24 +115,21 @@ struct dim { }; /** - * enum dim_cq_period_mode - * - * These are the modes for CQ period count. + * enum dim_cq_period_mode - Modes for CQ period count * * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) * @DIM_CQ_PERIOD_NUM_MODES: Number of modes */ -enum { +enum dim_cq_period_mode { DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, DIM_CQ_PERIOD_NUM_MODES }; /** - * enum dim_state + * enum dim_state - DIM algorithm states * - * These are the DIM algorithm states. * These will determine if the algorithm is in a valid state to start an iteration. * * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) @@ -131,16 +137,15 @@ enum { * need to perform an action * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure */ -enum { +enum dim_state { DIM_START_MEASURE, DIM_MEASURE_IN_PROGRESS, DIM_APPLY_NEW_PROFILE, }; /** - * enum dim_tune_state + * enum dim_tune_state - DIM algorithm tune states * - * These are the DIM algorithm tune states. * These will determine which action the algorithm should perform. * * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference @@ -148,7 +153,7 @@ enum { * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels */ -enum { +enum dim_tune_state { DIM_PARKING_ON_TOP, DIM_PARKING_TIRED, DIM_GOING_RIGHT, @@ -156,25 +161,23 @@ enum { }; /** - * enum dim_stats_state + * enum dim_stats_state - DIM algorithm statistics states * - * These are the DIM algorithm statistics states. * These will determine the verdict of current iteration. * * @DIM_STATS_WORSE: Current iteration shows worse performance than before - * @DIM_STATS_WORSE: Current iteration shows same performance than before - * @DIM_STATS_WORSE: Current iteration shows better performance than before + * @DIM_STATS_SAME: Current iteration shows same performance than before + * @DIM_STATS_BETTER: Current iteration shows better performance than before */ -enum { +enum dim_stats_state { DIM_STATS_WORSE, DIM_STATS_SAME, DIM_STATS_BETTER, }; /** - * enum dim_step_result + * enum dim_step_result - DIM algorithm step results * - * These are the DIM algorithm step results. * These describe the result of a step. * * @DIM_STEPPED: Performed a regular step @@ -182,7 +185,7 @@ enum { * tired parking * @DIM_ON_EDGE: Stepped to the most left/right profile */ -enum { +enum dim_step_result { DIM_STEPPED, DIM_TOO_TIRED, DIM_ON_EDGE, @@ -199,7 +202,7 @@ enum { bool dim_on_top(struct dim *dim); /** - * dim_turn - change profile alterning direction + * dim_turn - change profile altering direction * @dim: DIM context * * Go left if we were going right and vice-versa. @@ -238,7 +241,7 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, struct dim_stats *curr_stats); /** - * dim_update_sample - set a sample's fields with give values + * dim_update_sample - set a sample's fields with given values * @event_ctr: number of events to set * @packets: number of packets to set * @bytes: number of bytes to set @@ -304,8 +307,8 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); * @end_sample: Current data measurement * * Called by the consumer. - * This is the main logic of the algorithm, where data is processed in order to decide on next - * required action. + * This is the main logic of the algorithm, where data is processed in order + * to decide on next required action. */ void net_dim(struct dim *dim, struct dim_sample end_sample); diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index a99c58866860..fe740031339d 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -82,4 +82,10 @@ static inline int linkmode_equal(const unsigned long *src1, return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static inline int linkmode_subset(const unsigned long *src1, + const unsigned long *src2) +{ + return bitmap_subset(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + #endif /* __LINKMODE_H */ diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 34de06b426ef..8071148f29a6 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -47,16 +47,16 @@ struct vif_entry_notifier_info { }; static inline int mr_call_vif_notifier(struct notifier_block *nb, - struct net *net, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, - unsigned short vif_index, u32 tb_id) + unsigned short vif_index, u32 tb_id, + struct netlink_ext_ack *extack) { struct vif_entry_notifier_info info = { .info = { .family = family, - .net = net, + .extack = extack, }, .dev = vif->dev, .vif_index = vif_index, @@ -64,7 +64,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, .tb_id = tb_id, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static inline int mr_call_vif_notifiers(struct net *net, @@ -77,7 +77,6 @@ static inline int mr_call_vif_notifiers(struct net *net, struct vif_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .dev = vif->dev, .vif_index = vif_index, @@ -173,21 +172,21 @@ struct mfc_entry_notifier_info { }; static inline int mr_call_mfc_notifier(struct notifier_block *nb, - struct net *net, unsigned short family, enum fib_event_type event_type, - struct mr_mfc *mfc, u32 tb_id) + struct mr_mfc *mfc, u32 tb_id, + struct netlink_ext_ack *extack) { struct mfc_entry_notifier_info info = { .info = { .family = family, - .net = net, + .extack = extack, }, .mfc = mfc, .tb_id = tb_id }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static inline int mr_call_mfc_notifiers(struct net *net, @@ -199,7 +198,6 @@ static inline int mr_call_mfc_notifiers(struct net *net, struct mfc_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .mfc = mfc, .tb_id = tb_id @@ -301,10 +299,11 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock); + rwlock_t *mrt_lock, struct netlink_ext_ack *extack); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -355,10 +354,11 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, static inline int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock) + rwlock_t *mrt_lock, struct netlink_ext_ack *extack) { return -EINVAL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9eda1c31d1f7..3207e0b9ec4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -925,6 +925,16 @@ struct dev_ifalias { struct devlink; struct tlsdev_ops; +struct netdev_name_node { + struct hlist_node hlist; + struct list_head list; + struct net_device *dev; + const char *name; +}; + +int netdev_name_node_alt_create(struct net_device *dev, const char *name); +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1564,7 +1574,7 @@ enum netdev_priv_flags { * (i.e. as seen by users in the "Space.c" file). It is the name * of the interface. * - * @name_hlist: Device name hash chain, please keep it close to name[] + * @name_node: Name hashlist node * @ifalias: SNMP alias * @mem_end: Shared memory end * @mem_start: Shared memory start @@ -1774,7 +1784,7 @@ enum netdev_priv_flags { struct net_device { char name[IFNAMSIZ]; - struct hlist_node name_hlist; + struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; /* * I/O specific fields @@ -2494,6 +2504,9 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd); int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); +int unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb); struct netdev_notifier_info { struct net_device *dev; @@ -2564,6 +2577,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_continue_reverse(net, d) \ + list_for_each_entry_continue_reverse(d, &(net)->dev_base_head, \ + dev_list) #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ @@ -4097,9 +4113,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); -int dev_get_valid_name(struct net *net, struct net_device *dev, - const char *name); - #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 1c35428e98bc..355a08a76fd4 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -508,9 +508,9 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, u8 *data); void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); -struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, - void *upstream, - const struct sfp_upstream_ops *ops); +struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, + void *upstream, + const struct sfp_upstream_ops *ops); void sfp_unregister_upstream(struct sfp_bus *bus); #else static inline int sfp_parse_port(struct sfp_bus *bus, @@ -553,11 +553,11 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus) { } -static inline struct sfp_bus *sfp_register_upstream( +static inline struct sfp_bus *sfp_register_upstream_node( struct fwnode_handle *fwnode, void *upstream, const struct sfp_upstream_ops *ops) { - return (struct sfp_bus *)-1; + return NULL; } static inline void sfp_unregister_upstream(struct sfp_bus *bus) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7914fdaf4226..f7ae12a1d680 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2261,12 +2261,12 @@ static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); } -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { if (likely(len <= skb_headlen(skb))) - return 1; + return true; if (unlikely(len > skb->len)) - return 0; + return false; return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e4b3fb4bb77c..fe80d537945d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -28,13 +28,14 @@ struct sk_msg_sg { u32 end; u32 size; u32 copybreak; - bool copy[MAX_MSG_FRAGS]; + unsigned long copy; /* The extra element is used for chaining the front and sections when * the list becomes partitioned (e.g. end < start). The crypto APIs * require the chaining. */ struct scatterlist data[MAX_MSG_FRAGS + 1]; }; +static_assert(BITS_PER_LONG >= MAX_MSG_FRAGS); /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { @@ -227,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) { struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); - if (msg->sg.copy[msg->sg.start]) { + if (test_bit(msg->sg.start, &msg->sg.copy)) { msg->data = NULL; msg->data_end = NULL; } else { @@ -246,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sg_set_page(sge, page, len, offset); sg_unmark_end(sge); - msg->sg.copy[msg->sg.end] = true; + __set_bit(msg->sg.end, &msg->sg.copy); msg->sg.size += len; sk_msg_iter_next(msg, end); } @@ -254,7 +255,10 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { - msg->sg.copy[i] = copy_state; + if (copy_state) + __set_bit(i, &msg->sg.copy); + else + __clear_bit(i, &msg->sg.copy); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af4f265d0f67..27f6b046cf92 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -13,6 +13,7 @@ #include <linux/completion.h> #include <linux/scatterlist.h> #include <linux/gpio/consumer.h> +#include <linux/ptp_clock_kernel.h> struct dma_chan; struct property_entry; @@ -409,6 +410,12 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @fw_translate_cs: If the boot firmware uses different numbering scheme * what Linux expects, this optional hook can be used to translate * between the two. + * @ptp_sts_supported: If the driver sets this to true, it must provide a + * time snapshot in @spi_transfer->ptp_sts as close as possible to the + * moment in time when @spi_transfer->ptp_sts_word_pre and + * @spi_transfer->ptp_sts_word_post were transmitted. + * If the driver does not set this, the SPI core takes the snapshot as + * close to the driver hand-over as possible. * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -604,6 +611,15 @@ struct spi_controller { void *dummy_tx; int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); + + /* + * Driver sets this field to indicate it is able to snapshot SPI + * transfers (needed e.g. for reading the time of POSIX clocks) + */ + bool ptp_sts_supported; + + /* Interrupt enable state during PTP system timestamping */ + unsigned long irq_flags; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) @@ -644,6 +660,14 @@ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ct extern void spi_finalize_current_message(struct spi_controller *ctlr); extern void spi_finalize_current_transfer(struct spi_controller *ctlr); +/* Helper calls for driver to timestamp transfer */ +void spi_take_timestamp_pre(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); +void spi_take_timestamp_post(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); + /* the spi driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -753,6 +777,35 @@ extern void spi_res_release(struct spi_controller *ctlr, * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use + * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset + * within @tx_buf for which the SPI device is requesting that the time + * snapshot for this transfer begins. Upon completing the SPI transfer, + * this value may have changed compared to what was requested, depending + * on the available snapshotting resolution (DMA transfer, + * @ptp_sts_supported is false, etc). + * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning + * that a single byte should be snapshotted). + * If the core takes care of the timestamp (if @ptp_sts_supported is false + * for this controller), it will set @ptp_sts_word_pre to 0, and + * @ptp_sts_word_post to the length of the transfer. This is done + * purposefully (instead of setting to spi_transfer->len - 1) to denote + * that a transfer-level snapshot taken from within the driver may still + * be of higher quality. + * @ptp_sts: Pointer to a memory location held by the SPI slave device where a + * PTP system timestamp structure may lie. If drivers use PIO or their + * hardware has some sort of assist for retrieving exact transfer timing, + * they can (and should) assert @ptp_sts_supported and populate this + * structure using the ptp_read_system_*ts helper functions. + * The timestamp must represent the time at which the SPI slave device has + * processed the word, i.e. the "pre" timestamp should be taken before + * transmitting the "pre" word, and the "post" timestamp after receiving + * transmit confirmation from the controller for the "post" word. + * @timestamped_pre: Set by the SPI controller driver to denote it has acted + * upon the @ptp_sts request. Not set when the SPI core has taken care of + * the task. SPI device drivers are free to print a warning if this comes + * back unset and they need the better resolution. + * @timestamped_post: See above. The reason why both exist is that these + * booleans are also used to keep state in the core SPI logic. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. @@ -842,6 +895,14 @@ struct spi_transfer { u32 effective_speed_hz; + unsigned int ptp_sts_word_pre; + unsigned int ptp_sts_word_post; + + struct ptp_system_timestamp *ptp_sts; + + bool timestamped_pre; + bool timestamped_post; + struct list_head transfer_list; }; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dc60d03c4b60..86f9464c3f5d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -92,6 +92,7 @@ struct stmmac_dma_cfg { int fixed_burst; int mixed_burst; bool aal; + bool eame; }; #define AXI_BLEN 7 diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 3f62b347b04a..1bab88184d3c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -202,11 +202,11 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, /* * multicast prototypes (mcast.c) */ -static inline int ipv6_mc_may_pull(struct sk_buff *skb, - unsigned int len) +static inline bool ipv6_mc_may_pull(struct sk_buff *skb, + unsigned int len) { if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) - return 0; + return false; return pskb_may_pull(skb, len); } diff --git a/include/net/devlink.h b/include/net/devlink.h index 23e4b65ec9df..6bf3b9e0595a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -39,6 +39,7 @@ struct devlink { possible_net_t _net; struct mutex lock; bool reload_failed; + bool registered; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -506,11 +507,13 @@ enum devlink_health_reporter_state { struct devlink_health_reporter_ops { char *name; int (*recover)(struct devlink_health_reporter *reporter, - void *priv_ctx); + void *priv_ctx, struct netlink_ext_ack *extack); int (*dump)(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx); + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack); int (*diagnose)(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg); + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack); }; /** @@ -643,7 +646,7 @@ enum devlink_trap_group_generic_id { } struct devlink_ops { - int (*reload_down)(struct devlink *devlink, + int (*reload_down)(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack); int (*reload_up)(struct devlink *devlink, struct netlink_ext_ack *extack); @@ -771,6 +774,8 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev) struct ib_device; +struct net *devlink_net(const struct devlink *devlink); +void devlink_net_set(struct devlink *devlink, struct net *net); struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); diff --git a/include/net/dsa.h b/include/net/dsa.h index 541fb514e31d..8c3ea0530f65 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -94,8 +94,6 @@ struct __dsa_skb_cb { u8 priv[48 - sizeof(struct dsa_skb_cb)]; }; -#define __DSA_SKB_CB(skb) ((struct __dsa_skb_cb *)((skb)->cb)) - #define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb)) #define DSA_SKB_CB_PRIV(skb) \ diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index c49d7bfb5c30..6d59221ff05a 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -8,7 +8,6 @@ struct module; struct fib_notifier_info { - struct net *net; int family; struct netlink_ext_ack *extack; }; @@ -30,19 +29,21 @@ struct fib_notifier_ops { int family; struct list_head list; unsigned int (*fib_seq_read)(struct net *net); - int (*fib_dump)(struct net *net, struct notifier_block *nb); + int (*fib_dump)(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); struct module *owner; struct rcu_head rcu; }; -int call_fib_notifier(struct notifier_block *nb, struct net *net, +int call_fib_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)); -int unregister_fib_notifier(struct notifier_block *nb); +int register_fib_notifier(struct net *net, struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + struct netlink_ext_ack *extack); +int unregister_fib_notifier(struct net *net, struct notifier_block *nb); struct fib_notifier_ops * fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net); void fib_notifier_ops_unregister(struct fib_notifier_ops *ops); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 20dcadd8eed9..54e227e6b06a 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -194,7 +194,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); -int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, + struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9292f1c588b7..74950663bb00 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -75,8 +75,6 @@ struct genl_family { struct module *module; }; -struct nlattr **genl_family_attrbuf(const struct genl_family *family); - /** * struct genl_info - receiving information * @snd_seq: sending sequence number @@ -128,6 +126,24 @@ enum genl_validate_flags { }; /** + * struct genl_info - info that is available during dumpit op call + * @family: generic netlink family - for internal genl code usage + * @ops: generic netlink ops - for internal genl code usage + * @attrs: netlink attributes + */ +struct genl_dumpit_info { + const struct genl_family *family; + const struct genl_ops *ops; + struct nlattr **attrs; +}; + +static inline const struct genl_dumpit_info * +genl_dumpit_info(struct netlink_callback *cb) +{ + return cb->data; +} + +/** * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 4b5656c71abc..5d1615463138 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -478,7 +478,7 @@ struct ipv6_route_iter { extern const struct seq_operations ipv6_route_seq_ops; -int call_fib6_notifier(struct notifier_block *nb, struct net *net, +int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, @@ -488,7 +488,8 @@ int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); -int fib6_tables_dump(struct net *net, struct notifier_block *nb); +int fib6_tables_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); @@ -504,7 +505,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); -int fib6_rules_dump(struct net *net, struct notifier_block *nb); +int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); unsigned int fib6_rules_seq_read(struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, @@ -537,7 +539,8 @@ static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } -static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb) +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ab1ca9e238d2..52b2406a5dfc 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -219,7 +219,7 @@ struct fib_nh_notifier_info { struct fib_nh *fib_nh; }; -int call_fib4_notifier(struct notifier_block *nb, struct net *net, +int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, @@ -229,7 +229,8 @@ int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_info_notify_update(struct net *net, struct nl_info *info); -void fib_notify(struct net *net, struct notifier_block *nb); +int fib_notify(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); struct fib_table { struct hlist_node tb_hlist; @@ -315,7 +316,8 @@ static inline bool fib4_rule_default(const struct fib_rule *rule) return true; } -static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb) +static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } @@ -377,7 +379,8 @@ out: } bool fib4_rule_default(const struct fib_rule *rule); -int fib4_rules_dump(struct net *net, struct notifier_block *nb); +int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); unsigned int fib4_rules_seq_read(struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 009605c56f20..d04b7abe2a4c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -696,6 +696,11 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a) +{ + return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]); +} + static inline u32 ipv6_portaddr_hash(const struct net *net, const struct in6_addr *addr6, unsigned int port) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 523c6a09e1c8..d69081c38788 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3095,7 +3095,9 @@ enum ieee80211_filter_flags { * * @IEEE80211_AMPDU_RX_START: start RX aggregation * @IEEE80211_AMPDU_RX_STOP: stop RX aggregation - * @IEEE80211_AMPDU_TX_START: start TX aggregation + * @IEEE80211_AMPDU_TX_START: start TX aggregation, the driver must either + * call ieee80211_start_tx_ba_cb_irqsafe() or return the special + * status %IEEE80211_AMPDU_TX_START_IMMEDIATE. * @IEEE80211_AMPDU_TX_OPERATIONAL: TX aggregation has become operational * @IEEE80211_AMPDU_TX_STOP_CONT: stop TX aggregation but continue transmitting * queued packets, now unaggregated. After all packets are transmitted the @@ -3119,6 +3121,8 @@ enum ieee80211_ampdu_mlme_action { IEEE80211_AMPDU_TX_OPERATIONAL, }; +#define IEEE80211_AMPDU_TX_START_IMMEDIATE 1 + /** * struct ieee80211_ampdu_params - AMPDU action parameters * @@ -3896,7 +3900,10 @@ struct ieee80211_ops { * * Even ``189`` would be wrong since 1 could be lost again. * - * Returns a negative error code on failure. + * Returns a negative error code on failure. The driver may return + * %IEEE80211_AMPDU_TX_START_IMMEDIATE for %IEEE80211_AMPDU_TX_START + * if the session can start immediately. + * * The callback can sleep. */ int (*ampdu_action)(struct ieee80211_hw *hw, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 4c2cd9378699..c5d682992e38 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -36,6 +36,7 @@ #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> +#include <linux/notifier.h> struct user_namespace; struct proc_dir_entry; @@ -104,6 +105,8 @@ struct net { struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; + struct raw_notifier_head netdev_chain; + /* Note that @hash_mix can be read millions times per second, * it is critical that it is on a read_mostly cache line. */ @@ -326,7 +329,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet) /* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) - +#define for_each_net_continue_reverse(VAR) \ + list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) #define for_each_net_rcu(VAR) \ list_for_each_entry_rcu(VAR, &net_namespace_list, list) diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 830bdf345b17..b5fdb108d602 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -24,6 +24,9 @@ struct netns_mib { #ifdef CONFIG_XFRM_STATISTICS DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); #endif +#if IS_ENABLED(CONFIG_TLS) + DEFINE_SNMP_STAT(struct linux_tls_mib, tls_statistics); +#endif }; #endif diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index e1a92c4610f3..0b032b92da0b 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -80,13 +80,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( struct sctp_chunk *chunk, gfp_t gfp); -struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( - const struct sctp_association *asoc, - const struct sockaddr_storage *aaddr, - int flags, - int state, - int error, - gfp_t gfp); +void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport, + int state, int error); struct sctp_ulpevent *sctp_ulpevent_make_remote_error( const struct sctp_association *asoc, @@ -100,6 +95,13 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( __u32 error, gfp_t gfp); +struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event( + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + __u16 flags, + __u32 error, + gfp_t gfp); + struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( const struct sctp_association *asoc, __u16 flags, diff --git a/include/net/smc.h b/include/net/smc.h index bd9c0fb3b577..05174ae4f325 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -75,6 +75,9 @@ struct smcd_dev { struct workqueue_struct *event_wq; u8 pnetid[SMC_MAX_PNETID_LEN]; bool pnetid_by_user; + struct list_head lgr_list; + spinlock_t lgr_lock; + u8 going_away : 1; }; struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, diff --git a/include/net/snmp.h b/include/net/snmp.h index cb8ced4380a6..468a67836e2f 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -111,6 +111,12 @@ struct linux_xfrm_mib { unsigned long mibs[LINUX_MIB_XFRMMAX]; }; +/* Linux TLS */ +#define LINUX_MIB_TLSMAX __LINUX_MIB_TLSMAX +struct linux_tls_mib { + unsigned long mibs[LINUX_MIB_TLSMAX]; +}; + #define DEFINE_SNMP_STAT(type, name) \ __typeof__(type) __percpu *name #define DEFINE_SNMP_STAT_ATOMIC(type, name) \ diff --git a/include/net/sock.h b/include/net/sock.h index f69b58bff7e5..380312cc67a9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2523,7 +2523,7 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } -void sock_enable_timestamp(struct sock *sk, int flag); +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); diff --git a/include/net/tls.h b/include/net/tls.h index c664e6dba0d1..41265e542e71 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -43,6 +43,7 @@ #include <linux/netdevice.h> #include <linux/rcupdate.h> +#include <net/net_namespace.h> #include <net/tcp.h> #include <net/strparser.h> #include <crypto/aead.h> @@ -60,7 +61,6 @@ #define TLS_RECORD_TYPE_DATA 0x17 #define TLS_AAD_SPACE_SIZE 13 -#define TLS_DEVICE_NAME_MAX 32 #define MAX_IV_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 @@ -74,36 +74,14 @@ */ #define TLS_AES_CCM_IV_B0_BYTE 2 -/* - * This structure defines the routines for Inline TLS driver. - * The following routines are optional and filled with a - * null pointer if not defined. - * - * @name: Its the name of registered Inline tls device - * @dev_list: Inline tls device list - * int (*feature)(struct tls_device *device); - * Called to return Inline TLS driver capability - * - * int (*hash)(struct tls_device *device, struct sock *sk); - * This function sets Inline driver for listen and program - * device specific functioanlity as required - * - * void (*unhash)(struct tls_device *device, struct sock *sk); - * This function cleans listen state set by Inline TLS driver - * - * void (*release)(struct kref *kref); - * Release the registered device and allocated resources - * @kref: Number of reference to tls_device - */ -struct tls_device { - char name[TLS_DEVICE_NAME_MAX]; - struct list_head dev_list; - int (*feature)(struct tls_device *device); - int (*hash)(struct tls_device *device, struct sock *sk); - void (*unhash)(struct tls_device *device, struct sock *sk); - void (*release)(struct kref *kref); - struct kref kref; -}; +#define __TLS_INC_STATS(net, field) \ + __SNMP_INC_STATS((net)->mib.tls_statistics, field) +#define TLS_INC_STATS(net, field) \ + SNMP_INC_STATS((net)->mib.tls_statistics, field) +#define __TLS_DEC_STATS(net, field) \ + __SNMP_DEC_STATS((net)->mib.tls_statistics, field) +#define TLS_DEC_STATS(net, field) \ + SNMP_DEC_STATS((net)->mib.tls_statistics, field) enum { TLS_BASE, @@ -158,7 +136,7 @@ struct tls_sw_context_tx { struct list_head tx_list; atomic_t encrypt_pending; int async_notify; - int async_capable; + u8 async_capable:1; #define BIT_TX_SCHEDULED 0 #define BIT_TX_CLOSING 1 @@ -174,8 +152,8 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 control; - int async_capable; - bool decrypted; + u8 async_capable:1; + u8 decrypted:1; atomic_t decrypt_pending; bool async_notify; }; @@ -340,7 +318,10 @@ struct tls_offload_context_rx { #define TLS_OFFLOAD_CONTEXT_SIZE_RX \ (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) +struct tls_context *tls_ctx_create(struct sock *sk); void tls_ctx_free(struct sock *sk, struct tls_context *ctx); +void update_sk_prot(struct sock *sk, struct tls_context *ctx); + int wait_on_pending_writer(struct sock *sk, long *timeo); int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); @@ -623,13 +604,6 @@ tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) tls_offload_ctx_rx(tls_ctx)->resync_type = type; } -static inline void tls_offload_tx_resync_request(struct sock *sk) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - - WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags)); -} - /* Driver's seq tracking has to be disabled until resync succeeded */ static inline bool tls_offload_tx_resync_pending(struct sock *sk) { @@ -641,10 +615,11 @@ static inline bool tls_offload_tx_resync_pending(struct sock *sk) return ret; } +int __net_init tls_proc_init(struct net *net); +void __net_exit tls_proc_fini(struct net *net); + int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, unsigned char *record_type); -void tls_register_device(struct tls_device *device); -void tls_unregister_device(struct tls_device *device); int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout); struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); @@ -665,7 +640,9 @@ void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); void tls_device_offload_cleanup_rx(struct sock *sk); void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); -int tls_device_decrypted(struct sock *sk, struct sk_buff *skb); +void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq); +int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm); #else static inline void tls_device_init(void) {} static inline void tls_device_cleanup(void) {} @@ -688,7 +665,9 @@ static inline void tls_device_offload_cleanup_rx(struct sock *sk) {} static inline void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {} -static inline int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) +static inline int +tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm) { return 0; } diff --git a/include/net/tls_toe.h b/include/net/tls_toe.h new file mode 100644 index 000000000000..b3aa7593ce2c --- /dev/null +++ b/include/net/tls_toe.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kref.h> +#include <linux/list.h> + +struct sock; + +#define TLS_TOE_DEVICE_NAME_MAX 32 + +/* + * This structure defines the routines for Inline TLS driver. + * The following routines are optional and filled with a + * null pointer if not defined. + * + * @name: Its the name of registered Inline tls device + * @dev_list: Inline tls device list + * int (*feature)(struct tls_toe_device *device); + * Called to return Inline TLS driver capability + * + * int (*hash)(struct tls_toe_device *device, struct sock *sk); + * This function sets Inline driver for listen and program + * device specific functioanlity as required + * + * void (*unhash)(struct tls_toe_device *device, struct sock *sk); + * This function cleans listen state set by Inline TLS driver + * + * void (*release)(struct kref *kref); + * Release the registered device and allocated resources + * @kref: Number of reference to tls_toe_device + */ +struct tls_toe_device { + char name[TLS_TOE_DEVICE_NAME_MAX]; + struct list_head dev_list; + int (*feature)(struct tls_toe_device *device); + int (*hash)(struct tls_toe_device *device, struct sock *sk); + void (*unhash)(struct tls_toe_device *device, struct sock *sk); + void (*release)(struct kref *kref); + struct kref kref; +}; + +int tls_toe_bypass(struct sock *sk); +int tls_toe_hash(struct sock *sk); +void tls_toe_unhash(struct sock *sk); + +void tls_toe_register_device(struct tls_toe_device *device); +void tls_toe_unregister_device(struct tls_toe_device *device); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77c6be96d676..a65c3b0c6935 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -794,7 +794,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(char *buf, u32 size_of_buf) + * int bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -1023,7 +1023,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1068,7 +1068,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) + * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1085,7 +1085,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags) + * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1154,7 +1154,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1172,7 +1172,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * 0 * - * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1595,7 +1595,7 @@ union bpf_attr { * Return * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. * - * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1715,7 +1715,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1947,7 +1947,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) + * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -1980,7 +1980,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) + * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2033,7 +2033,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) + * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2392,7 +2392,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2408,9 +2408,9 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description - * Will remove *pop* bytes from a *msg* starting at byte *start*. + * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if * an allocation and copy are required due to a full ring buffer. * However, the helper will try to avoid doing the allocation @@ -2505,7 +2505,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buf *skb) + * int bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 580b7a2e40e1..b558ea88b766 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -421,6 +421,10 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */ + DEVLINK_ATTR_NETNS_FD, /* u32 */ + DEVLINK_ATTR_NETNS_PID, /* u32 */ + DEVLINK_ATTR_NETNS_ID, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8938b76c4ee3..d4591792f0b4 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1507,6 +1507,11 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66, ETHTOOL_LINK_MODE_100baseT1_Full_BIT = 67, ETHTOOL_LINK_MODE_1000baseT1_Full_BIT = 68, + ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT = 69, + ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT = 70, + ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71, + ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, + ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS @@ -1618,6 +1623,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_56000 56000 #define SPEED_100000 100000 #define SPEED_200000 200000 +#define SPEED_400000 400000 #define SPEED_UNKNOWN -1 diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 7fea0fd7d6f5..4bf33344aab1 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -33,6 +33,7 @@ #define IFNAMSIZ 16 #endif /* __UAPI_DEF_IF_IFNAMSIZ */ #define IFALIASZ 256 +#define ALTIFNAMSIZ 128 #include <linux/hdlc/ioctl.h> /* For glibc compatibility. An empty enum does not compile. */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4a8c02cafa9a..8aec8769d944 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -167,6 +167,8 @@ enum { IFLA_NEW_IFINDEX, IFLA_MIN_MTU, IFLA_MAX_MTU, + IFLA_PROP_LIST, + IFLA_ALT_IFNAME, /* Alternative ifname */ __IFLA_MAX }; diff --git a/include/uapi/linux/netfilter_arp/arp_tables.h b/include/uapi/linux/netfilter_arp/arp_tables.h index a2a0927d9bd6..bbf5af2b67a8 100644 --- a/include/uapi/linux/netfilter_arp/arp_tables.h +++ b/include/uapi/linux/netfilter_arp/arp_tables.h @@ -199,7 +199,7 @@ struct arpt_get_entries { /* Helper functions */ static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e) { - return (void *)e + e->target_offset; + return (struct xt_entry_target *)((char *)e + e->target_offset); } /* diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h index 8076c940ffeb..a494cf43a755 100644 --- a/include/uapi/linux/netfilter_bridge/ebtables.h +++ b/include/uapi/linux/netfilter_bridge/ebtables.h @@ -194,7 +194,7 @@ struct ebt_entry { static __inline__ struct ebt_entry_target * ebt_get_target(struct ebt_entry *e) { - return (void *)e + e->target_offset; + return (struct ebt_entry_target *)((char *)e + e->target_offset); } /* {g,s}etsockopt numbers */ diff --git a/include/uapi/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h index 6aaeb14bfce1..50c7fee625ae 100644 --- a/include/uapi/linux/netfilter_ipv4/ip_tables.h +++ b/include/uapi/linux/netfilter_ipv4/ip_tables.h @@ -222,7 +222,7 @@ struct ipt_get_entries { static __inline__ struct xt_entry_target * ipt_get_target(struct ipt_entry *e) { - return (void *)e + e->target_offset; + return (struct xt_entry_target *)((char *)e + e->target_offset); } /* diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h index 031d0a43bed2..d9e364f96a5c 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h +++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h @@ -262,7 +262,7 @@ struct ip6t_get_entries { static __inline__ struct xt_entry_target * ip6t_get_target(struct ip6t_entry *e) { - return (void *)e + e->target_offset; + return (struct xt_entry_target *)((char *)e + e->target_offset); } /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index beee59c831a7..64135ab3a7ac 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -571,6 +571,14 @@ * set of BSSID,frequency parameters is used (i.e., either the enforcing * %NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict * %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT). + * Driver shall not modify the IEs specified through %NL80211_ATTR_IE if + * %NL80211_ATTR_MAC is included. However, if %NL80211_ATTR_MAC_HINT is + * included, these IEs through %NL80211_ATTR_IE are specified by the user + * space based on the best possible BSS selected. Thus, if the driver ends + * up selecting a different BSS, it can modify these IEs accordingly (e.g. + * userspace asks the driver to perform PMKSA caching with BSS1 and the + * driver ends up selecting BSS2 with different PMKSA cache entry; RSNIE + * has to get updated with the apt PMKID). * %NL80211_ATTR_PREV_BSSID can be used to request a reassociation within * the ESS in case the device is already associated and an association with * a different BSS is desired. diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index ce2a623abb75..1418a8362bb7 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -164,6 +164,13 @@ enum { RTM_GETNEXTHOP, #define RTM_GETNEXTHOP RTM_GETNEXTHOP + RTM_NEWLINKPROP = 108, +#define RTM_NEWLINKPROP RTM_NEWLINKPROP + RTM_DELLINKPROP, +#define RTM_DELLINKPROP RTM_DELLINKPROP + RTM_GETLINKPROP, +#define RTM_GETLINKPROP RTM_GETLINKPROP + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6d5b164af55c..6bce7f9837a9 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -449,6 +449,16 @@ struct sctp_send_failed { __u8 ssf_data[0]; }; +struct sctp_send_failed_event { + __u16 ssf_type; + __u16 ssf_flags; + __u32 ssf_length; + __u32 ssf_error; + struct sctp_sndinfo ssfe_info; + sctp_assoc_t ssf_assoc_id; + __u8 ssf_data[0]; +}; + /* * ssf_flags: 16 bits (unsigned integer) * @@ -605,6 +615,7 @@ struct sctp_event_subscribe { __u8 sctp_stream_reset_event; __u8 sctp_assoc_reset_event; __u8 sctp_stream_change_event; + __u8 sctp_send_failure_event_event; }; /* @@ -632,6 +643,7 @@ union sctp_notification { struct sctp_stream_reset_event sn_strreset_event; struct sctp_assoc_reset_event sn_assocreset_event; struct sctp_stream_change_event sn_strchange_event; + struct sctp_send_failed_event sn_send_failed_event; }; /* Section 5.3.1 @@ -667,7 +679,9 @@ enum sctp_sn_type { #define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT SCTP_STREAM_CHANGE_EVENT, #define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT - SCTP_SN_TYPE_MAX = SCTP_STREAM_CHANGE_EVENT, + SCTP_SEND_FAILED_EVENT, +#define SCTP_SEND_FAILED_EVENT SCTP_SEND_FAILED_EVENT + SCTP_SN_TYPE_MAX = SCTP_SEND_FAILED_EVENT, #define SCTP_SN_TYPE_MAX SCTP_SN_TYPE_MAX }; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 549a31c29f7d..7eee233e78d2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -323,4 +323,21 @@ enum __LINUX_MIB_XFRMMAX }; +/* linux TLS mib definitions */ +enum +{ + LINUX_MIB_TLSNUM = 0, + LINUX_MIB_TLSCURRTXSW, /* TlsCurrTxSw */ + LINUX_MIB_TLSCURRRXSW, /* TlsCurrRxSw */ + LINUX_MIB_TLSCURRTXDEVICE, /* TlsCurrTxDevice */ + LINUX_MIB_TLSCURRRXDEVICE, /* TlsCurrRxDevice */ + LINUX_MIB_TLSTXSW, /* TlsTxSw */ + LINUX_MIB_TLSRXSW, /* TlsRxSw */ + LINUX_MIB_TLSTXDEVICE, /* TlsTxDevice */ + LINUX_MIB_TLSRXDEVICE, /* TlsRxDevice */ + LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */ + LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ + __LINUX_MIB_TLSMAX +}; + #endif /* _LINUX_SNMP_H */ diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 4955e1a9f1bc..4dfc05651c98 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -309,7 +309,7 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) tlv_ptr->tlv_len = htons(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); - memset(TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); + memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); } return TLV_SPACE(len); } @@ -409,7 +409,7 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, tcm_hdr->tcm_flags = htons(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); - memset(TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); + memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); } return TCM_SPACE(data_len); } diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 4c4e24c291a5..559f42e73315 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -169,7 +169,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, { vr->num = num; vr->desc = p; - vr->avail = p + num*sizeof(struct vring_desc); + vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc)); vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + align-1) & ~(align - 1)); } diff --git a/init/do_mounts.c b/init/do_mounts.c index 9634ecf3743d..af9cda887a23 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -212,6 +212,7 @@ static int match_dev_by_label(struct device *dev, const void *data) * a colon. * 9) PARTLABEL=<name> with name being the GPT partition label. * MSDOS partitions do not support labels! + * 10) /dev/cifs represents Root_CIFS (0xfe) * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -268,6 +269,9 @@ dev_t name_to_dev_t(const char *name) res = Root_NFS; if (strcmp(name, "nfs") == 0) goto done; + res = Root_CIFS; + if (strcmp(name, "cifs") == 0) + goto done; res = Root_RAM0; if (strcmp(name, "ram") == 0) goto done; @@ -501,6 +505,42 @@ static int __init mount_nfs_root(void) } #endif +#ifdef CONFIG_CIFS_ROOT + +extern int cifs_root_data(char **dev, char **opts); + +#define CIFSROOT_TIMEOUT_MIN 5 +#define CIFSROOT_TIMEOUT_MAX 30 +#define CIFSROOT_RETRY_MAX 5 + +static int __init mount_cifs_root(void) +{ + char *root_dev, *root_data; + unsigned int timeout; + int try, err; + + err = cifs_root_data(&root_dev, &root_data); + if (err != 0) + return 0; + + timeout = CIFSROOT_TIMEOUT_MIN; + for (try = 1; ; try++) { + err = do_mount_root(root_dev, "cifs", root_mountflags, + root_data); + if (err == 0) + return 1; + if (try > CIFSROOT_RETRY_MAX) + break; + + ssleep(timeout); + timeout <<= 1; + if (timeout > CIFSROOT_TIMEOUT_MAX) + timeout = CIFSROOT_TIMEOUT_MAX; + } + return 0; +} +#endif + #if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD) void __init change_floppy(char *fmt, ...) { @@ -542,6 +582,15 @@ void __init mount_root(void) ROOT_DEV = Root_FD0; } #endif +#ifdef CONFIG_CIFS_ROOT + if (ROOT_DEV == Root_CIFS) { + if (mount_cifs_root()) + return; + + printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying floppy.\n"); + ROOT_DEV = Root_FD0; + } +#endif #ifdef CONFIG_BLK_DEV_FD if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { /* rd_doload is 2 for a dual initrd/ramload setup */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ffc3e53f5300..d3446f018b9a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2739,6 +2739,41 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) reg->smax_value = reg->umax_value; } +static bool bpf_map_is_rdonly(const struct bpf_map *map) +{ + return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; +} + +static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) +{ + void *ptr; + u64 addr; + int err; + + err = map->ops->map_direct_value_addr(map, &addr, off); + if (err) + return err; + ptr = (void *)(long)addr + off; + + switch (size) { + case sizeof(u8): + *val = (u64)*(u8 *)ptr; + break; + case sizeof(u16): + *val = (u64)*(u16 *)ptr; + break; + case sizeof(u32): + *val = (u64)*(u32 *)ptr; + break; + case sizeof(u64): + *val = *(u64 *)ptr; + break; + default: + return -EINVAL; + } + return 0; +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -2776,9 +2811,27 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err) return err; err = check_map_access(env, regno, off, size, false); - if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(env, regs, value_regno); + if (!err && t == BPF_READ && value_regno >= 0) { + struct bpf_map *map = reg->map_ptr; + + /* if map is read-only, track its contents as scalars */ + if (tnum_is_const(reg->var_off) && + bpf_map_is_rdonly(map) && + map->ops->map_direct_value_addr) { + int map_off = off + reg->var_off.value; + u64 val = 0; + err = bpf_map_direct_read(map, map_off, size, + &val); + if (err) + return err; + + regs[value_regno].type = SCALAR_VALUE; + __mark_reg_known(®s[value_regno], val); + } else { + mark_reg_unknown(env, regs, value_regno); + } + } } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; diff --git a/net/caif/Kconfig b/net/caif/Kconfig index eb83051c8330..b7532a79ca7a 100644 --- a/net/caif/Kconfig +++ b/net/caif/Kconfig @@ -13,11 +13,11 @@ menuconfig CAIF with its modems. It is accessed from user space as sockets (PF_CAIF). Say Y (or M) here if you build for a phone product (e.g. Android or - MeeGo ) that uses CAIF as transport, if unsure say N. + MeeGo) that uses CAIF as transport. If unsure say N. If you select to build it as module then CAIF_NETDEV also needs to be - built as modules. You will also need to say yes to any CAIF physical - devices that your platform requires. + built as a module. You will also need to say Y (or M) to any CAIF + physical devices that your platform requires. See Documentation/networking/caif for a further explanation on how to use and configure CAIF. @@ -37,7 +37,7 @@ config CAIF_NETDEV default CAIF ---help--- Say Y if you will be using a CAIF based GPRS network device. - This can be either built-in or a loadable module, + This can be either built-in or a loadable module. If you select to build it as a built-in then the main CAIF device must also be a built-in. If unsure say Y. @@ -48,7 +48,7 @@ config CAIF_USB default n ---help--- Say Y if you are using CAIF over USB CDC NCM. - This can be either built-in or a loadable module, + This can be either built-in or a loadable module. If you select to build it as a built-in then the main CAIF device must also be a built-in. If unsure say N. diff --git a/net/core/dev.c b/net/core/dev.c index bf3ed413abaf..74f593986524 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -228,6 +228,122 @@ static inline void rps_unlock(struct softnet_data *sd) #endif } +static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev, + const char *name) +{ + struct netdev_name_node *name_node; + + name_node = kmalloc(sizeof(*name_node), GFP_KERNEL); + if (!name_node) + return NULL; + INIT_HLIST_NODE(&name_node->hlist); + name_node->dev = dev; + name_node->name = name; + return name_node; +} + +static struct netdev_name_node * +netdev_name_node_head_alloc(struct net_device *dev) +{ + struct netdev_name_node *name_node; + + name_node = netdev_name_node_alloc(dev, dev->name); + if (!name_node) + return NULL; + INIT_LIST_HEAD(&name_node->list); + return name_node; +} + +static void netdev_name_node_free(struct netdev_name_node *name_node) +{ + kfree(name_node); +} + +static void netdev_name_node_add(struct net *net, + struct netdev_name_node *name_node) +{ + hlist_add_head_rcu(&name_node->hlist, + dev_name_hash(net, name_node->name)); +} + +static void netdev_name_node_del(struct netdev_name_node *name_node) +{ + hlist_del_rcu(&name_node->hlist); +} + +static struct netdev_name_node *netdev_name_node_lookup(struct net *net, + const char *name) +{ + struct hlist_head *head = dev_name_hash(net, name); + struct netdev_name_node *name_node; + + hlist_for_each_entry(name_node, head, hlist) + if (!strcmp(name_node->name, name)) + return name_node; + return NULL; +} + +static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net, + const char *name) +{ + struct hlist_head *head = dev_name_hash(net, name); + struct netdev_name_node *name_node; + + hlist_for_each_entry_rcu(name_node, head, hlist) + if (!strcmp(name_node->name, name)) + return name_node; + return NULL; +} + +int netdev_name_node_alt_create(struct net_device *dev, const char *name) +{ + struct netdev_name_node *name_node; + struct net *net = dev_net(dev); + + name_node = netdev_name_node_lookup(net, name); + if (name_node) + return -EEXIST; + name_node = netdev_name_node_alloc(dev, name); + if (!name_node) + return -ENOMEM; + netdev_name_node_add(net, name_node); + /* The node that holds dev->name acts as a head of per-device list. */ + list_add_tail(&name_node->list, &dev->name_node->list); + + return 0; +} +EXPORT_SYMBOL(netdev_name_node_alt_create); + +static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) +{ + list_del(&name_node->list); + netdev_name_node_del(name_node); + kfree(name_node->name); + netdev_name_node_free(name_node); +} + +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) +{ + struct netdev_name_node *name_node; + struct net *net = dev_net(dev); + + name_node = netdev_name_node_lookup(net, name); + if (!name_node) + return -ENOENT; + __netdev_name_node_alt_destroy(name_node); + + return 0; +} +EXPORT_SYMBOL(netdev_name_node_alt_destroy); + +static void netdev_name_node_alt_flush(struct net_device *dev) +{ + struct netdev_name_node *name_node, *tmp; + + list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) + __netdev_name_node_alt_destroy(name_node); +} + /* Device list insertion */ static void list_netdevice(struct net_device *dev) { @@ -237,7 +353,7 @@ static void list_netdevice(struct net_device *dev) write_lock_bh(&dev_base_lock); list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); + netdev_name_node_add(net, dev->name_node); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); @@ -255,7 +371,7 @@ static void unlist_netdevice(struct net_device *dev) /* Unlink dev from the device chain */ write_lock_bh(&dev_base_lock); list_del_rcu(&dev->dev_list); - hlist_del_rcu(&dev->name_hlist); + netdev_name_node_del(dev->name_node); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); @@ -733,14 +849,10 @@ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst); struct net_device *__dev_get_by_name(struct net *net, const char *name) { - struct net_device *dev; - struct hlist_head *head = dev_name_hash(net, name); + struct netdev_name_node *node_name; - hlist_for_each_entry(dev, head, name_hlist) - if (!strncmp(dev->name, name, IFNAMSIZ)) - return dev; - - return NULL; + node_name = netdev_name_node_lookup(net, name); + return node_name ? node_name->dev : NULL; } EXPORT_SYMBOL(__dev_get_by_name); @@ -758,14 +870,10 @@ EXPORT_SYMBOL(__dev_get_by_name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) { - struct net_device *dev; - struct hlist_head *head = dev_name_hash(net, name); - - hlist_for_each_entry_rcu(dev, head, name_hlist) - if (!strncmp(dev->name, name, IFNAMSIZ)) - return dev; + struct netdev_name_node *node_name; - return NULL; + node_name = netdev_name_node_lookup_rcu(net, name); + return node_name ? node_name->dev : NULL; } EXPORT_SYMBOL(dev_get_by_name_rcu); @@ -1141,8 +1249,8 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -int dev_get_valid_name(struct net *net, struct net_device *dev, - const char *name) +static int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) { BUG_ON(!net); @@ -1158,7 +1266,6 @@ int dev_get_valid_name(struct net *net, struct net_device *dev, return 0; } -EXPORT_SYMBOL(dev_get_valid_name); /** * dev_change_name - change name of a device @@ -1232,13 +1339,13 @@ rollback: netdev_adjacent_rename_links(dev, oldname); write_lock_bh(&dev_base_lock); - hlist_del_rcu(&dev->name_hlist); + netdev_name_node_del(dev->name_node); write_unlock_bh(&dev_base_lock); synchronize_rcu(); write_lock_bh(&dev_base_lock); - hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); + netdev_name_node_add(net, dev->name_node); write_unlock_bh(&dev_base_lock); ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); @@ -1617,6 +1724,62 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, return nb->notifier_call(nb, val, &info); } +static int call_netdevice_register_notifiers(struct notifier_block *nb, + struct net_device *dev) +{ + int err; + + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + return err; + + if (!(dev->flags & IFF_UP)) + return 0; + + call_netdevice_notifier(nb, NETDEV_UP, dev); + return 0; +} + +static void call_netdevice_unregister_notifiers(struct notifier_block *nb, + struct net_device *dev) +{ + if (dev->flags & IFF_UP) { + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); + } + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); +} + +static int call_netdevice_register_net_notifiers(struct notifier_block *nb, + struct net *net) +{ + struct net_device *dev; + int err; + + for_each_netdev(net, dev) { + err = call_netdevice_register_notifiers(nb, dev); + if (err) + goto rollback; + } + return 0; + +rollback: + for_each_netdev_continue_reverse(net, dev) + call_netdevice_unregister_notifiers(nb, dev); + return err; +} + +static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb, + struct net *net) +{ + struct net_device *dev; + + for_each_netdev(net, dev) + call_netdevice_unregister_notifiers(nb, dev); +} + static int dev_boot_phase = 1; /** @@ -1635,8 +1798,6 @@ static int dev_boot_phase = 1; int register_netdevice_notifier(struct notifier_block *nb) { - struct net_device *dev; - struct net_device *last; struct net *net; int err; @@ -1649,17 +1810,9 @@ int register_netdevice_notifier(struct notifier_block *nb) if (dev_boot_phase) goto unlock; for_each_net(net) { - for_each_netdev(net, dev) { - err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); - err = notifier_to_errno(err); - if (err) - goto rollback; - - if (!(dev->flags & IFF_UP)) - continue; - - call_netdevice_notifier(nb, NETDEV_UP, dev); - } + err = call_netdevice_register_net_notifiers(nb, net); + if (err) + goto rollback; } unlock: @@ -1668,22 +1821,9 @@ unlock: return err; rollback: - last = dev; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev == last) - goto outroll; + for_each_net_continue_reverse(net) + call_netdevice_unregister_net_notifiers(nb, net); - if (dev->flags & IFF_UP) { - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, - dev); - call_netdevice_notifier(nb, NETDEV_DOWN, dev); - } - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); - } - } - -outroll: raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } @@ -1734,6 +1874,80 @@ unlock: EXPORT_SYMBOL(unregister_netdevice_notifier); /** + * register_netdevice_notifier_net - register a per-netns network notifier block + * @net: network namespace + * @nb: notifier + * + * Register a notifier to be called when network device events occur. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + * + * When registered all registration and up events are replayed + * to the new notifier to allow device to have a race free + * view of the network device list. + */ + +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = raw_notifier_chain_register(&net->netdev_chain, nb); + if (err) + goto unlock; + if (dev_boot_phase) + goto unlock; + + err = call_netdevice_register_net_notifiers(nb, net); + if (err) + goto chain_unregister; + +unlock: + rtnl_unlock(); + return err; + +chain_unregister: + raw_notifier_chain_unregister(&netdev_chain, nb); + goto unlock; +} +EXPORT_SYMBOL(register_netdevice_notifier_net); + +/** + * unregister_netdevice_notifier_net - unregister a per-netns + * network notifier block + * @net: network namespace + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_netdevice_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + * + * After unregistering unregister and down device events are synthesized + * for all devices on the device list to the removed notifier to remove + * the need for special case cleanup code. + */ + +int unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); + if (err) + goto unlock; + + call_netdevice_unregister_net_notifiers(nb, net); + +unlock: + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_net); + +/** * call_netdevice_notifiers_info - call all network notifier blocks * @val: value passed unmodified to notifier function * @info: notifier information data @@ -1745,7 +1959,18 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); static int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info) { + struct net *net = dev_net(info->dev); + int ret; + ASSERT_RTNL(); + + /* Run per-netns notifier block chain first, then run the global one. + * Hopefully, one day, the global one is going to be removed after + * all notifier block registrators get converted to be per-netns. + */ + ret = raw_notifier_call_chain(&net->netdev_chain, val, info); + if (ret & NOTIFY_STOP_MASK) + return ret; return raw_notifier_call_chain(&netdev_chain, val, info); } @@ -2771,7 +2996,7 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) void netif_schedule_queue(struct netdev_queue *txq) { rcu_read_lock(); - if (!(txq->state & QUEUE_STATE_ANY_XOFF)) { + if (!netif_xmit_stopped(txq)) { struct Qdisc *q = rcu_dereference(txq->qdisc); __netif_schedule(q); @@ -2939,12 +3164,9 @@ int skb_checksum_help(struct sk_buff *skb) offset += skb->csum_offset; BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__sum16))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } + ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); + if (ret) + goto out; *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: @@ -2979,12 +3201,11 @@ int skb_crc32c_csum_help(struct sk_buff *skb) ret = -EINVAL; goto out; } - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__le32))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } + + ret = skb_ensure_writable(skb, offset + sizeof(__le32)); + if (ret) + goto out; + crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, skb->len - start, ~(__u32)0, crc32c_csum_stub)); @@ -5663,6 +5884,26 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +{ + list_add_tail(&skb->list, &napi->rx_list); + if (++napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + static void napi_skb_free_stolen_head(struct sk_buff *skb) { skb_dst_drop(skb); @@ -5670,12 +5911,13 @@ static void napi_skb_free_stolen_head(struct sk_buff *skb) kmem_cache_free(skbuff_head_cache, skb); } -static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) +static gro_result_t napi_skb_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb_internal(skb)) - ret = GRO_DROP; + gro_normal_one(napi, skb); break; case GRO_DROP: @@ -5707,7 +5949,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) skb_gro_reset_offset(skb); - ret = napi_skb_finish(dev_gro_receive(napi, skb), skb); + ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); trace_napi_gro_receive_exit(ret); return ret; @@ -5753,26 +5995,6 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ -static void gro_normal_list(struct napi_struct *napi) -{ - if (!napi->rx_count) - return; - netif_receive_skb_list_internal(&napi->rx_list); - INIT_LIST_HEAD(&napi->rx_list); - napi->rx_count = 0; -} - -/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, - * pass the whole batch up to the stack. - */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) -{ - list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) - gro_normal_list(napi); -} - static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) @@ -8264,6 +8486,9 @@ static void rollback_registered_many(struct list_head *head) dev_uc_flush(dev); dev_mc_flush(dev); + netdev_name_node_alt_flush(dev); + netdev_name_node_free(dev->name_node); + if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); @@ -8706,6 +8931,11 @@ int register_netdevice(struct net_device *dev) if (ret < 0) goto out; + ret = -ENOMEM; + dev->name_node = netdev_name_node_head_alloc(dev); + if (!dev->name_node) + goto out; + /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -8827,6 +9057,8 @@ out: return ret; err_uninit: + if (dev->name_node) + netdev_name_node_free(dev->name_node); if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); if (dev->priv_destructor) @@ -9635,6 +9867,8 @@ static int __net_init netdev_init(struct net *net) if (net->dev_index_head == NULL) goto err_idx; + RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); + return 0; err_idx: diff --git a/net/core/devlink.c b/net/core/devlink.c index f80151eeaf51..97e9a2246929 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -95,16 +95,25 @@ static LIST_HEAD(devlink_list); */ static DEFINE_MUTEX(devlink_mutex); -static struct net *devlink_net(const struct devlink *devlink) +struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); } +EXPORT_SYMBOL_GPL(devlink_net); -static void devlink_net_set(struct devlink *devlink, struct net *net) +static void __devlink_net_set(struct devlink *devlink, struct net *net) { write_pnet(&devlink->_net, net); } +void devlink_net_set(struct devlink *devlink, struct net *net) +{ + if (WARN_ON(devlink->registered)) + return; + __devlink_net_set(devlink, net); +} +EXPORT_SYMBOL_GPL(devlink_net_set); + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { @@ -434,8 +443,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, { struct devlink *devlink; - devlink = devlink_get_from_info(info); - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + /* When devlink changes netns, it would not be found + * by devlink_get_from_info(). So try if it is stored first. + */ + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { + devlink = info->user_ptr[0]; + } else { + devlink = devlink_get_from_info(info); + WARN_ON(IS_ERR(devlink)); + } + if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); } @@ -1035,7 +1052,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, struct devlink_sb *devlink_sb; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -1058,6 +1075,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -1233,7 +1253,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, struct devlink_sb *devlink_sb; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -1256,6 +1276,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -1460,7 +1483,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, struct devlink_sb *devlink_sb; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -1485,6 +1508,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -2674,6 +2700,72 @@ devlink_resources_validate(struct devlink *devlink, return err; } +static struct net *devlink_netns_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID]; + struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD]; + struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID]; + struct net *net; + + if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) { + NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified"); + return ERR_PTR(-EINVAL); + } + + if (netns_pid_attr) { + net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr)); + } else if (netns_fd_attr) { + net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr)); + } else if (netns_id_attr) { + net = get_net_ns_by_id(sock_net(skb->sk), + nla_get_u32(netns_id_attr)); + if (!net) + net = ERR_PTR(-EINVAL); + } else { + WARN_ON(1); + net = ERR_PTR(-EINVAL); + } + if (IS_ERR(net)) { + NL_SET_ERR_MSG(info->extack, "Unknown network namespace"); + return ERR_PTR(-EINVAL); + } + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return ERR_PTR(-EPERM); + } + return net; +} + +static void devlink_param_notify(struct devlink *devlink, + unsigned int port_index, + struct devlink_param_item *param_item, + enum devlink_command cmd); + +static void devlink_reload_netns_change(struct devlink *devlink, + struct net *dest_net) +{ + struct devlink_param_item *param_item; + + /* Userspace needs to be notified about devlink objects + * removed from original and entering new network namespace. + * The rest of the devlink objects are re-created during + * reload process so the notifications are generated separatelly. + */ + + list_for_each_entry(param_item, &devlink->param_list, list) + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + devlink_notify(devlink, DEVLINK_CMD_DEL); + + __devlink_net_set(devlink, dest_net); + + devlink_notify(devlink, DEVLINK_CMD_NEW); + list_for_each_entry(param_item, &devlink->param_list, list) + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); +} + static bool devlink_reload_supported(struct devlink *devlink) { return devlink->ops->reload_down && devlink->ops->reload_up; @@ -2694,9 +2786,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_is_reload_failed); +static int devlink_reload(struct devlink *devlink, struct net *dest_net, + struct netlink_ext_ack *extack) +{ + int err; + + err = devlink->ops->reload_down(devlink, !!dest_net, extack); + if (err) + return err; + + if (dest_net && !net_eq(dest_net, devlink_net(devlink))) + devlink_reload_netns_change(devlink, dest_net); + + err = devlink->ops->reload_up(devlink, extack); + devlink_reload_failed_set(devlink, !!err); + return err; +} + static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; + struct net *dest_net = NULL; int err; if (!devlink_reload_supported(devlink)) @@ -2707,11 +2817,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed"); return err; } - err = devlink->ops->reload_down(devlink, info->extack); - if (err) - return err; - err = devlink->ops->reload_up(devlink, info->extack); - devlink_reload_failed_set(devlink, !!err); + + if (info->attrs[DEVLINK_ATTR_NETNS_PID] || + info->attrs[DEVLINK_ATTR_NETNS_FD] || + info->attrs[DEVLINK_ATTR_NETNS_ID]) { + dest_net = devlink_netns_get(skb, info); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + } + + err = devlink_reload(devlink, dest_net, info->extack); + + if (dest_net) + put_net(dest_net); + return err; } @@ -3155,7 +3274,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, struct devlink *devlink; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -3183,6 +3302,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -3411,7 +3533,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct devlink *devlink; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -3444,6 +3566,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, out: mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -3818,29 +3943,19 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + const struct genl_dumpit_info *info = genl_dumpit_info(cb); u64 ret_offset, start_offset, end_offset = 0; + struct nlattr **attrs = info->attrs; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; - struct nlattr **attrs; bool dump = true; void *hdr; int err; start_offset = *((u64 *)&cb->args[0]); - attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return -ENOMEM; - - err = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, - devlink_nl_family.policy, cb->extack); - if (err) - goto out_free; - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) { @@ -3917,7 +4032,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); - kfree(attrs); return skb->len; @@ -3927,8 +4041,6 @@ out_unlock: mutex_unlock(&devlink->lock); out_dev: mutex_unlock(&devlink_mutex); -out_free: - kfree(attrs); return err; } @@ -4066,7 +4178,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, struct devlink *devlink; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { @@ -4094,6 +4206,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, } mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + cb->args[0] = idx; return msg->len; } @@ -4732,14 +4847,17 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); static int devlink_health_reporter_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, struct netlink_ext_ack *extack) { int err; + if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY) + return 0; + if (!reporter->ops->recover) return -EOPNOTSUPP; - err = reporter->ops->recover(reporter, priv_ctx); + err = reporter->ops->recover(reporter, priv_ctx, extack); if (err) return err; @@ -4760,7 +4878,8 @@ devlink_health_dump_clear(struct devlink_health_reporter *reporter) } static int devlink_health_do_dump(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { int err; @@ -4781,7 +4900,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter, goto dump_err; err = reporter->ops->dump(reporter, reporter->dump_fmsg, - priv_ctx); + priv_ctx, extack); if (err) goto dump_err; @@ -4828,11 +4947,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter, mutex_lock(&reporter->dump_lock); /* store current dump of current error, for later analysis */ - devlink_health_do_dump(reporter, priv_ctx); + devlink_health_do_dump(reporter, priv_ctx, NULL); mutex_unlock(&reporter->dump_lock); if (reporter->auto_recover) - return devlink_health_reporter_recover(reporter, priv_ctx); + return devlink_health_reporter_recover(reporter, + priv_ctx, NULL); return 0; } @@ -4867,21 +4987,10 @@ devlink_health_reporter_get_from_info(struct devlink *devlink, static struct devlink_health_reporter * devlink_health_reporter_get_from_cb(struct netlink_callback *cb) { + const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct devlink_health_reporter *reporter; + struct nlattr **attrs = info->attrs; struct devlink *devlink; - struct nlattr **attrs; - int err; - - attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); - if (!attrs) - return NULL; - - err = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, - devlink_nl_family.policy, cb->extack); - if (err) - goto free; mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); @@ -4890,12 +4999,9 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); mutex_unlock(&devlink_mutex); - kfree(attrs); return reporter; unlock: mutex_unlock(&devlink_mutex); -free: - kfree(attrs); return NULL; } @@ -5084,7 +5190,7 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - err = devlink_health_reporter_recover(reporter, NULL); + err = devlink_health_reporter_recover(reporter, NULL, info->extack); devlink_health_reporter_put(reporter); return err; @@ -5117,7 +5223,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, if (err) goto out; - err = reporter->ops->diagnose(reporter, fmsg); + err = reporter->ops->diagnose(reporter, fmsg, info->extack); if (err) goto out; @@ -5152,7 +5258,7 @@ devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, } mutex_lock(&reporter->dump_lock); if (!start) { - err = devlink_health_do_dump(reporter, NULL); + err = devlink_health_do_dump(reporter, NULL, cb->extack); if (err) goto unlock; cb->args[1] = reporter->dump_ts; @@ -5793,6 +5899,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -6023,7 +6132,8 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_REGION_READ, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_region_read_dumpit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, @@ -6071,7 +6181,8 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -6155,7 +6266,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) if (!devlink) return NULL; devlink->ops = ops; - devlink_net_set(devlink, &init_net); + __devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); @@ -6181,6 +6292,7 @@ int devlink_register(struct devlink *devlink, struct device *dev) { mutex_lock(&devlink_mutex); devlink->dev = dev; + devlink->registered = true; list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); @@ -8060,9 +8172,43 @@ int devlink_compat_switch_id_get(struct net_device *dev, return 0; } +static void __net_exit devlink_pernet_pre_exit(struct net *net) +{ + struct devlink *devlink; + int err; + + /* In case network namespace is getting destroyed, reload + * all devlink instances from this namespace into init_net. + */ + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (net_eq(devlink_net(devlink), net)) { + if (WARN_ON(!devlink_reload_supported(devlink))) + continue; + err = devlink_reload(devlink, &init_net, NULL); + if (err) + pr_warn("Failed to reload devlink instance into init_net\n"); + } + } + mutex_unlock(&devlink_mutex); +} + +static struct pernet_operations devlink_pernet_ops __net_initdata = { + .pre_exit = devlink_pernet_pre_exit, +}; + static int __init devlink_init(void) { - return genl_register_family(&devlink_nl_family); + int err; + + err = genl_register_family(&devlink_nl_family); + if (err) + goto out; + err = register_pernet_subsys(&devlink_pernet_ops); + +out: + WARN_ON(err); + return err; } subsys_initcall(devlink_init); diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 470a606d5e8d..fc96259807b6 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -12,17 +12,15 @@ static unsigned int fib_notifier_net_id; struct fib_notifier_net { struct list_head fib_notifier_ops; + struct atomic_notifier_head fib_chain; }; -static ATOMIC_NOTIFIER_HEAD(fib_chain); - -int call_fib_notifier(struct notifier_block *nb, struct net *net, +int call_fib_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info) { int err; - info->net = net; err = nb->notifier_call(nb, event_type, info); return notifier_to_errno(err); } @@ -31,106 +29,100 @@ EXPORT_SYMBOL(call_fib_notifier); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info) { + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); int err; - info->net = net; - err = atomic_notifier_call_chain(&fib_chain, event_type, info); + err = atomic_notifier_call_chain(&fn_net->fib_chain, event_type, info); return notifier_to_errno(err); } EXPORT_SYMBOL(call_fib_notifiers); -static unsigned int fib_seq_sum(void) +static unsigned int fib_seq_sum(struct net *net) { - struct fib_notifier_net *fn_net; + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *ops; unsigned int fib_seq = 0; - struct net *net; rtnl_lock(); - down_read(&net_rwsem); - for_each_net(net) { - fn_net = net_generic(net, fib_notifier_net_id); - rcu_read_lock(); - list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { - if (!try_module_get(ops->owner)) - continue; - fib_seq += ops->fib_seq_read(net); - module_put(ops->owner); - } - rcu_read_unlock(); + rcu_read_lock(); + list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { + if (!try_module_get(ops->owner)) + continue; + fib_seq += ops->fib_seq_read(net); + module_put(ops->owner); } - up_read(&net_rwsem); + rcu_read_unlock(); rtnl_unlock(); return fib_seq; } -static int fib_net_dump(struct net *net, struct notifier_block *nb) +static int fib_net_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *ops; + int err = 0; + rcu_read_lock(); list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { - int err; - if (!try_module_get(ops->owner)) continue; - err = ops->fib_dump(net, nb); + err = ops->fib_dump(net, nb, extack); module_put(ops->owner); if (err) - return err; + goto unlock; } - return 0; +unlock: + rcu_read_unlock(); + + return err; } -static bool fib_dump_is_consistent(struct notifier_block *nb, +static bool fib_dump_is_consistent(struct net *net, struct notifier_block *nb, void (*cb)(struct notifier_block *nb), unsigned int fib_seq) { - atomic_notifier_chain_register(&fib_chain, nb); - if (fib_seq == fib_seq_sum()) + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + atomic_notifier_chain_register(&fn_net->fib_chain, nb); + if (fib_seq == fib_seq_sum(net)) return true; - atomic_notifier_chain_unregister(&fib_chain, nb); + atomic_notifier_chain_unregister(&fn_net->fib_chain, nb); if (cb) cb(nb); return false; } #define FIB_DUMP_MAX_RETRIES 5 -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)) +int register_fib_notifier(struct net *net, struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + struct netlink_ext_ack *extack) { int retries = 0; int err; do { - unsigned int fib_seq = fib_seq_sum(); - struct net *net; - - rcu_read_lock(); - for_each_net_rcu(net) { - err = fib_net_dump(net, nb); - if (err) - goto err_fib_net_dump; - } - rcu_read_unlock(); - - if (fib_dump_is_consistent(nb, cb, fib_seq)) + unsigned int fib_seq = fib_seq_sum(net); + + err = fib_net_dump(net, nb, extack); + if (err) + return err; + + if (fib_dump_is_consistent(net, nb, cb, fib_seq)) return 0; } while (++retries < FIB_DUMP_MAX_RETRIES); return -EBUSY; - -err_fib_net_dump: - rcu_read_unlock(); - return err; } EXPORT_SYMBOL(register_fib_notifier); -int unregister_fib_notifier(struct notifier_block *nb) +int unregister_fib_notifier(struct net *net, struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&fib_chain, nb); + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + return atomic_notifier_chain_unregister(&fn_net->fib_chain, nb); } EXPORT_SYMBOL(unregister_fib_notifier); @@ -181,6 +173,7 @@ static int __net_init fib_notifier_net_init(struct net *net) struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); INIT_LIST_HEAD(&fn_net->fib_notifier_ops); + ATOMIC_INIT_NOTIFIER_HEAD(&fn_net->fib_chain); return 0; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index dd220ce7ca7a..3e7e15278c46 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -321,16 +321,18 @@ out: } EXPORT_SYMBOL_GPL(fib_rules_lookup); -static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, +static int call_fib_rule_notifier(struct notifier_block *nb, enum fib_event_type event_type, - struct fib_rule *rule, int family) + struct fib_rule *rule, int family, + struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = family, + .info.extack = extack, .rule = rule, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static int call_fib_rule_notifiers(struct net *net, @@ -350,20 +352,25 @@ static int call_fib_rule_notifiers(struct net *net, } /* Called with rcu_read_lock() */ -int fib_rules_dump(struct net *net, struct notifier_block *nb, int family) +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, + struct netlink_ext_ack *extack) { struct fib_rules_ops *ops; struct fib_rule *rule; + int err = 0; ops = lookup_rules_ops(net, family); if (!ops) return -EAFNOSUPPORT; - list_for_each_entry_rcu(rule, &ops->rules_list, list) - call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule, - family); + list_for_each_entry_rcu(rule, &ops->rules_list, list) { + err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD, + rule, family, extack); + if (err) + break; + } rules_ops_put(ops); - return 0; + return err; } EXPORT_SYMBOL_GPL(fib_rules_dump); diff --git a/net/core/filter.c b/net/core/filter.c index 3fed5755494b..72b6af6b089e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2245,7 +2245,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, * account for the headroom. */ bytes_sg_total = start - offset + bytes; - if (!msg->sg.copy[i] && bytes_sg_total <= len) + if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len) goto out; /* At this point we need to linearize multiple scatterlist @@ -2450,7 +2450,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, /* Place newly allocated data buffer */ sk_mem_charge(msg->sk, len); msg->sg.size += len; - msg->sg.copy[new] = false; + __clear_bit(new, &msg->sg.copy); sg_set_page(&msg->sg.data[new], page, len + copy, 0); if (rsge.length) { get_page(sg_page(&rsge)); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7c09d87d3269..dbf502c18656 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -114,19 +114,50 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, { struct bpf_prog *attached; struct net *net; + int ret = 0; net = current->nsproxy->net_ns; mutex_lock(&flow_dissector_mutex); + + if (net == &init_net) { + /* BPF flow dissector in the root namespace overrides + * any per-net-namespace one. When attaching to root, + * make sure we don't have any BPF program attached + * to the non-root namespaces. + */ + struct net *ns; + + for_each_net(ns) { + if (ns == &init_net) + continue; + if (rcu_access_pointer(ns->flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + } else { + /* Make sure root flow dissector is not attached + * when attaching to the non-root namespace. + */ + if (rcu_access_pointer(init_net.flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&flow_dissector_mutex)); - if (attached) { - /* Only one BPF program can be attached at a time */ - mutex_unlock(&flow_dissector_mutex); - return -EEXIST; + if (attached == prog) { + /* The same program cannot be attached twice */ + ret = -EINVAL; + goto out; } rcu_assign_pointer(net->flow_dissector_prog, prog); + if (attached) + bpf_prog_put(attached); +out: mutex_unlock(&flow_dissector_mutex); - return 0; + return ret; } int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) @@ -910,7 +941,10 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { rcu_read_lock(); - attached = rcu_dereference(net->flow_dissector_prog); + attached = rcu_dereference(init_net.flow_dissector_prog); + + if (!attached) + attached = rcu_dereference(net->flow_dissector_prog); if (attached) { struct bpf_flow_keys flow_keys; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 36347933ec3a..6bbd06f7dc7d 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -20,8 +20,8 @@ static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff struct hlist_head *h; unsigned int count = 0, offset = get_offset(*pos); - h = &net->dev_name_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, h, name_hlist) { + h = &net->dev_index_head[get_bucket(*pos)]; + hlist_for_each_entry_rcu(dev, h, index_hlist) { if (++count == offset) return dev; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 48b1e429857c..294bfcf0ce0e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3404,7 +3404,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) HARD_TX_LOCK(odev, txq, smp_processor_id()); if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) { - ret = NETDEV_TX_BUSY; pkt_dev->last_ok = 0; goto unlock; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1ee6460f8275..49fa910b58af 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -980,6 +980,19 @@ static size_t rtnl_xdp_size(void) return xdp_size; } +static size_t rtnl_prop_list_size(const struct net_device *dev) +{ + struct netdev_name_node *name_node; + size_t size; + + if (list_empty(&dev->name_node->list)) + return 0; + size = nla_total_size(0); + list_for_each_entry(name_node, &dev->name_node->list, list) + size += nla_total_size(ALTIFNAMSIZ); + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1027,6 +1040,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */ + nla_total_size(4) /* IFLA_MIN_MTU */ + nla_total_size(4) /* IFLA_MAX_MTU */ + + rtnl_prop_list_size(dev) + 0; } @@ -1584,6 +1598,42 @@ static int rtnl_fill_link_af(struct sk_buff *skb, return 0; } +static int rtnl_fill_alt_ifnames(struct sk_buff *skb, + const struct net_device *dev) +{ + struct netdev_name_node *name_node; + int count = 0; + + list_for_each_entry(name_node, &dev->name_node->list, list) { + if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name)) + return -EMSGSIZE; + count++; + } + return count; +} + +static int rtnl_fill_prop_list(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *prop_list; + int ret; + + prop_list = nla_nest_start(skb, IFLA_PROP_LIST); + if (!prop_list) + return -EMSGSIZE; + + ret = rtnl_fill_alt_ifnames(skb, dev); + if (ret <= 0) + goto nest_cancel; + + nla_nest_end(skb, prop_list); + return 0; + +nest_cancel: + nla_nest_cancel(skb, prop_list); + return ret; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1697,6 +1747,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure_rcu; rcu_read_unlock(); + if (rtnl_fill_prop_list(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -1750,6 +1803,9 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 }, [IFLA_MIN_MTU] = { .type = NLA_U32 }, [IFLA_MAX_MTU] = { .type = NLA_U32 }, + [IFLA_PROP_LIST] = { .type = NLA_NESTED }, + [IFLA_ALT_IFNAME] = { .type = NLA_STRING, + .len = ALTIFNAMSIZ - 1 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2722,6 +2778,26 @@ errout: return err; } +static struct net_device *rtnl_dev_get(struct net *net, + struct nlattr *ifname_attr, + struct nlattr *altifname_attr, + char *ifname) +{ + char buffer[ALTIFNAMSIZ]; + + if (!ifname) { + ifname = buffer; + if (ifname_attr) + nla_strlcpy(ifname, ifname_attr, IFNAMSIZ); + else if (altifname_attr) + nla_strlcpy(ifname, altifname_attr, ALTIFNAMSIZ); + else + return NULL; + } + + return __dev_get_by_name(net, ifname); +} + static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2750,8 +2826,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(net, ifname); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname); else goto errout; @@ -2824,7 +2900,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *tgt_net = net; struct net_device *dev = NULL; struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; int err; int netnsid = -1; @@ -2838,9 +2913,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - if (tb[IFLA_TARGET_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]); tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid); @@ -2852,8 +2924,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(tgt_net, ifname); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(net, tb[IFLA_IFNAME], + tb[IFLA_ALT_IFNAME], NULL); else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else @@ -3024,12 +3097,10 @@ replay: ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); - else { - if (ifname[0]) - dev = __dev_get_by_name(net, ifname); - else - dev = NULL; - } + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname); + else + dev = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); @@ -3291,6 +3362,7 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, switch (i) { case IFLA_IFNAME: + case IFLA_ALT_IFNAME: case IFLA_EXT_MASK: case IFLA_TARGET_NETNSID: break; @@ -3309,7 +3381,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct net *tgt_net = net; struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; @@ -3332,9 +3403,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(tgt_net); } - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -3342,8 +3410,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(tgt_net, ifname); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(tgt_net, tb[IFLA_IFNAME], + tb[IFLA_ALT_IFNAME], NULL); else goto out; @@ -3373,6 +3442,100 @@ out: return err; } +static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, + bool *changed, struct netlink_ext_ack *extack) +{ + char *alt_ifname; + int err; + + err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack); + if (err) + return err; + + alt_ifname = nla_data(attr); + if (cmd == RTM_NEWLINKPROP) { + alt_ifname = kstrdup(alt_ifname, GFP_KERNEL); + if (!alt_ifname) + return -ENOMEM; + err = netdev_name_node_alt_create(dev, alt_ifname); + if (err) { + kfree(alt_ifname); + return err; + } + } else if (cmd == RTM_DELLINKPROP) { + err = netdev_name_node_alt_destroy(dev, alt_ifname); + if (err) + return err; + } else { + WARN_ON(1); + return 0; + } + + *changed = true; + return 0; +} + +static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + struct ifinfomsg *ifm; + bool changed = false; + struct nlattr *attr; + int err, rem; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + if (err) + return err; + + err = rtnl_ensure_unique_netns(tb, extack, true); + if (err) + return err; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(net, ifm->ifi_index); + else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + dev = rtnl_dev_get(net, tb[IFLA_IFNAME], + tb[IFLA_ALT_IFNAME], NULL); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + if (!tb[IFLA_PROP_LIST]) + return 0; + + nla_for_each_nested(attr, tb[IFLA_PROP_LIST], rem) { + switch (nla_type(attr)) { + case IFLA_ALT_IFNAME: + err = rtnl_alt_ifname(cmd, dev, attr, &changed, extack); + if (err) + return err; + break; + } + } + + if (changed) + netdev_state_change(dev); + return 0; +} + +static int rtnl_newlinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return rtnl_linkprop(RTM_NEWLINKPROP, skb, nlh, extack); +} + +static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack); +} + static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -5331,6 +5494,9 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0); rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0); + rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0); diff --git a/net/core/sock.c b/net/core/sock.c index a515392ba84b..5cb567e36f5e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -333,7 +333,6 @@ EXPORT_SYMBOL(__sk_backlog_rcv); static int sock_get_timeout(long timeo, void *optval, bool old_timeval) { struct __kernel_sock_timeval tv; - int size; if (timeo == MAX_SCHEDULE_TIMEOUT) { tv.tv_sec = 0; @@ -354,13 +353,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) old_tv.tv_sec = tv.tv_sec; old_tv.tv_usec = tv.tv_usec; *(struct __kernel_old_timeval *)optval = old_tv; - size = sizeof(old_tv); - } else { - *(struct __kernel_sock_timeval *)optval = tv; - size = sizeof(tv); + return sizeof(old_tv); } - return size; + *(struct __kernel_sock_timeval *)optval = tv; + return sizeof(tv); } static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval) @@ -687,7 +684,8 @@ out: return ret; } -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) +static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit, + int valbool) { if (valbool) sock_set_flag(sk, bit); @@ -3042,7 +3040,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp, } EXPORT_SYMBOL(sock_gettstamp); -void sock_enable_timestamp(struct sock *sk, int flag) +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) { if (!sock_flag(sk, flag)) { unsigned long previous_flags = sk->sk_flags; diff --git a/net/core/xdp.c b/net/core/xdp.c index d7bf62ffbb5e..20781ad5f9c3 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); /* XDP RX runs under NAPI protection, and in different delivery error * scenarios (e.g. queue full), it is possible to return the xdp_frame - * while still leveraging this protection. The @napi_direct boolian + * while still leveraging this protection. The @napi_direct boolean * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index ffcfcef76291..7c5a1aa5adb4 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -236,21 +236,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, struct cfg802154_registered_device **rdev, struct wpan_dev **wpan_dev) { + const struct genl_dumpit_info *info = genl_dumpit_info(cb); int err; rtnl_lock(); if (!cb->args[0]) { - err = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nl802154_fam.hdrsize, - genl_family_attrbuf(&nl802154_fam), - nl802154_fam.maxattr, - nl802154_policy, NULL); - if (err) - goto out_unlock; - *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk), - genl_family_attrbuf(&nl802154_fam)); + info->attrs); if (IS_ERR(*wpan_dev)) { err = PTR_ERR(*wpan_dev); goto out_unlock; @@ -557,17 +550,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl802154_dump_wpan_phy_state *state) { - struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); - int ret = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nl802154_fam.hdrsize, - tb, nl802154_fam.maxattr, - nl802154_policy, NULL); - - /* TODO check if we can handle error here, - * we have no backward compatibility - */ - if (ret) - return 0; + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct nlattr **tb = info->attrs; if (tb[NL802154_ATTR_WPAN_PHY]) state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]); @@ -2203,7 +2187,8 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_GET_WPAN_PHY, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .doit = nl802154_get_wpan_phy, .dumpit = nl802154_dump_wpan_phy, .done = nl802154_dump_wpan_phy_done, @@ -2343,7 +2328,8 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_GET_SEC_KEY, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO .doit by matching key id? */ .dumpit = nl802154_dump_llsec_key, .flags = GENL_ADMIN_PERM, @@ -2369,7 +2355,8 @@ static const struct genl_ops nl802154_ops[] = { /* TODO unique identifier must short+pan OR extended_addr */ { .cmd = NL802154_CMD_GET_SEC_DEV, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO .doit by matching extended_addr? */ .dumpit = nl802154_dump_llsec_dev, .flags = GENL_ADMIN_PERM, @@ -2395,7 +2382,8 @@ static const struct genl_ops nl802154_ops[] = { /* TODO remove complete devkey, put it as nested? */ { .cmd = NL802154_CMD_GET_SEC_DEVKEY, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO doit by matching ??? */ .dumpit = nl802154_dump_llsec_devkey, .flags = GENL_ADMIN_PERM, @@ -2420,7 +2408,8 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_GET_SEC_LEVEL, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, /* TODO .doit by matching frame_type? */ .dumpit = nl802154_dump_llsec_seclevel, .flags = GENL_ADMIN_PERM, diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index b804ccbdb241..0c28bd469a68 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -9,12 +9,12 @@ #include <net/netns/ipv4.h> #include <net/ip_fib.h> -int call_fib4_notifier(struct notifier_block *nb, struct net *net, +int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info) { info->family = AF_INET; - return call_fib_notifier(nb, net, event_type, info); + return call_fib_notifier(nb, event_type, info); } int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, @@ -34,17 +34,16 @@ static unsigned int fib4_seq_read(struct net *net) return net->ipv4.fib_seq + fib4_rules_seq_read(net); } -static int fib4_dump(struct net *net, struct notifier_block *nb) +static int fib4_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { int err; - err = fib4_rules_dump(net, nb); + err = fib4_rules_dump(net, nb, extack); if (err) return err; - fib_notify(net, nb); - - return 0; + return fib_notify(net, nb, extack); } static const struct fib_notifier_ops fib4_notifier_ops_template = { diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b43a7ba5c6a4..f99e3bac5cab 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -65,9 +65,10 @@ bool fib4_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib4_rule_default); -int fib4_rules_dump(struct net *net, struct notifier_block *nb) +int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, AF_INET); + return fib_rules_dump(net, nb, AF_INET, extack); } unsigned int fib4_rules_seq_read(struct net *net) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1ab2fb6bb37d..b9df9c09b84e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -74,11 +74,13 @@ #include <trace/events/fib.h> #include "fib_lookup.h" -static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, +static int call_fib_entry_notifier(struct notifier_block *nb, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_alias *fa) + int dst_len, struct fib_alias *fa, + struct netlink_ext_ack *extack) { struct fib_entry_notifier_info info = { + .info.extack = extack, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, @@ -86,7 +88,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .type = fa->fa_type, .tb_id = fa->tb_id, }; - return call_fib4_notifier(nb, net, event_type, &info.info); + return call_fib4_notifier(nb, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, @@ -2015,10 +2017,12 @@ void fib_info_notify_update(struct net *net, struct nl_info *info) } } -static void fib_leaf_notify(struct net *net, struct key_vector *l, - struct fib_table *tb, struct notifier_block *nb) +static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb, + struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct fib_alias *fa; + int err; hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { struct fib_info *fi = fa->fa_info; @@ -2032,39 +2036,53 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, if (tb->tb_id != fa->tb_id) continue; - call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, - KEYLENGTH - fa->fa_slen, fa); + err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key, + KEYLENGTH - fa->fa_slen, + fa, extack); + if (err) + return err; } + return 0; } -static void fib_table_notify(struct net *net, struct fib_table *tb, - struct notifier_block *nb) +static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; t_key key = 0; + int err; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - fib_leaf_notify(net, l, tb, nb); + err = fib_leaf_notify(l, tb, nb, extack); + if (err) + return err; key = l->key + 1; /* stop in case of wrap around */ if (key < l->key) break; } + return 0; } -void fib_notify(struct net *net, struct notifier_block *nb) +int fib_notify(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { unsigned int h; + int err; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, head, tb_hlist) - fib_table_notify(net, tb, nb); + hlist_for_each_entry_rcu(tb, head, tb_hlist) { + err = fib_table_notify(tb, nb, extack); + if (err) + return err; + } } + return 0; } static void __trie_free_rcu(struct rcu_head *head) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 480d0b22db1a..3b9c7a2725a9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1563,7 +1563,7 @@ static int ip_mc_check_igmp_msg(struct sk_buff *skb) } } -static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) +static __sum16 ip_mc_validate_checksum(struct sk_buff *skb) { return skb_checksum_simple_validate(skb); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 9bcca08efec9..32e20b758b68 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1483,10 +1483,10 @@ static int __init ip_auto_config(void) * missing values. */ if (ic_myaddr == NONE || -#ifdef CONFIG_ROOT_NFS +#if defined(CONFIG_ROOT_NFS) || defined(CONFIG_CIFS_ROOT) (root_server_addr == NONE && ic_servaddr == NONE && - ROOT_DEV == Root_NFS) || + (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) || #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC @@ -1513,6 +1513,12 @@ static int __init ip_auto_config(void) goto try_try_again; } #endif +#ifdef CONFIG_CIFS_ROOT + if (ROOT_DEV == Root_CIFS) { + pr_err("IP-Config: Retrying forever (CIFS root)...\n"); + goto try_try_again; + } +#endif if (--retries) { pr_err("IP-Config: Reopening network devices...\n"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 716d5472c022..440294bdb752 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -278,9 +278,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) rtnl_unlock(); } -static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); + return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); } static unsigned int ipmr_rules_seq_read(struct net *net) @@ -336,7 +337,8 @@ static void __net_exit ipmr_rules_exit(struct net *net) rtnl_unlock(); } -static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } @@ -3040,10 +3042,11 @@ static unsigned int ipmr_seq_read(struct net *net) return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); } -static int ipmr_dump(struct net *net, struct notifier_block *nb) +static int ipmr_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, - ipmr_mr_table_iter, &mrt_lock); + ipmr_mr_table_iter, &mrt_lock, extack); } static const struct fib_notifier_ops ipmr_notifier_ops_template = { diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index ea48bd15a575..aa8738a91210 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -386,15 +386,17 @@ EXPORT_SYMBOL(mr_rtm_dumproute); int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock) + rwlock_t *mrt_lock, + struct netlink_ext_ack *extack) { struct mr_table *mrt; int err; - err = rules_dump(net, nb); + err = rules_dump(net, nb, extack); if (err) return err; @@ -409,17 +411,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, if (!v->dev) continue; - mr_call_vif_notifier(nb, net, family, - FIB_EVENT_VIF_ADD, - v, vifi, mrt->id); + err = mr_call_vif_notifier(nb, family, + FIB_EVENT_VIF_ADD, + v, vifi, mrt->id, extack); + if (err) + break; } read_unlock(mrt_lock); + if (err) + return err; + /* Notify on table MFC entries */ - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) - mr_call_mfc_notifier(nb, net, family, - FIB_EVENT_ENTRY_ADD, - mfc, mrt->id); + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { + err = mr_call_mfc_notifier(nb, family, + FIB_EVENT_ENTRY_ADD, + mfc, mrt->id, extack); + if (err) + return err; + } } return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 42187a3b82f4..9d69e1da93f2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1741,8 +1741,8 @@ static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; + u32 length = 0, seq, offset, zap_len; const skb_frag_t *frags = NULL; - u32 length = 0, seq, offset; struct vm_area_struct *vma; struct sk_buff *skb = NULL; struct tcp_sock *tp; @@ -1769,12 +1769,12 @@ static int tcp_zerocopy_receive(struct sock *sk, seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); - zc->length &= ~(PAGE_SIZE - 1); - if (zc->length) { - zap_page_range(vma, address, zc->length); + zap_len = zc->length & ~(PAGE_SIZE - 1); + if (zap_len) { + zap_page_range(vma, address, zap_len); zc->recv_skip_hint = 0; } else { - zc->recv_skip_hint = inq; + zc->recv_skip_hint = zc->length; } ret = 0; while (length + PAGE_SIZE <= zc->length) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6be568334848..c616f0ad1fa0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -121,11 +121,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { if (ipv6_addr_loopback(&tw->tw_v6_daddr) || - (ipv6_addr_v4mapped(&tw->tw_v6_daddr) && - (tw->tw_v6_daddr.s6_addr[12] == 127)) || + ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) || ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) || - (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) && - (tw->tw_v6_rcv_saddr.s6_addr[12] == 127))) + ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr)) loopback = true; } else #endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 34ccef18b40e..98d82305d6de 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5552,14 +5552,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); if (!nla) goto nla_put_failure; - - if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) - goto nla_put_failure; - read_lock_bh(&idev->lock); memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); read_unlock_bh(&idev->lock); + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index 05f82baaa99e..f87ae33e1d01 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -7,12 +7,12 @@ #include <net/netns/ipv6.h> #include <net/ip6_fib.h> -int call_fib6_notifier(struct notifier_block *nb, struct net *net, +int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info) { info->family = AF_INET6; - return call_fib_notifier(nb, net, event_type, info); + return call_fib_notifier(nb, event_type, info); } int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, @@ -27,15 +27,16 @@ static unsigned int fib6_seq_read(struct net *net) return fib6_tables_seq_read(net) + fib6_rules_seq_read(net); } -static int fib6_dump(struct net *net, struct notifier_block *nb) +static int fib6_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { int err; - err = fib6_rules_dump(net, nb); + err = fib6_rules_dump(net, nb, extack); if (err) return err; - return fib6_tables_dump(net, nb); + return fib6_tables_dump(net, nb, extack); } static const struct fib_notifier_ops fib6_notifier_ops_template = { diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index f9e8fe3ff0c5..fafe556d21e0 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -47,9 +47,10 @@ bool fib6_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib6_rule_default); -int fib6_rules_dump(struct net *net, struct notifier_block *nb) +int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, AF_INET6); + return fib_rules_dump(net, nb, AF_INET6, extack); } unsigned int fib6_rules_seq_read(struct net *net) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6e2af411cd9c..f66bc2af4e9d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -357,15 +357,17 @@ unsigned int fib6_tables_seq_read(struct net *net) return fib_seq; } -static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, +static int call_fib6_entry_notifier(struct notifier_block *nb, enum fib_event_type event_type, - struct fib6_info *rt) + struct fib6_info *rt, + struct netlink_ext_ack *extack) { struct fib6_entry_notifier_info info = { + .info.extack = extack, .rt = rt, }; - return call_fib6_notifier(nb, net, event_type, &info.info); + return call_fib6_notifier(nb, event_type, &info.info); } int call_fib6_entry_notifiers(struct net *net, @@ -401,40 +403,51 @@ int call_fib6_multipath_entry_notifiers(struct net *net, struct fib6_dump_arg { struct net *net; struct notifier_block *nb; + struct netlink_ext_ack *extack; }; -static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) +static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { if (rt == arg->net->ipv6.fib6_null_entry) - return; - call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); + return 0; + return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD, + rt, arg->extack); } static int fib6_node_dump(struct fib6_walker *w) { struct fib6_info *rt; + int err = 0; - for_each_fib6_walker_rt(w) - fib6_rt_dump(rt, w->args); + for_each_fib6_walker_rt(w) { + err = fib6_rt_dump(rt, w->args); + if (err) + break; + } w->leaf = NULL; - return 0; + return err; } -static void fib6_table_dump(struct net *net, struct fib6_table *tb, - struct fib6_walker *w) +static int fib6_table_dump(struct net *net, struct fib6_table *tb, + struct fib6_walker *w) { + int err; + w->root = &tb->tb6_root; spin_lock_bh(&tb->tb6_lock); - fib6_walk(net, w); + err = fib6_walk(net, w); spin_unlock_bh(&tb->tb6_lock); + return err; } /* Called with rcu_read_lock() */ -int fib6_tables_dump(struct net *net, struct notifier_block *nb) +int fib6_tables_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct fib6_dump_arg arg; struct fib6_walker *w; unsigned int h; + int err = 0; w = kzalloc(sizeof(*w), GFP_ATOMIC); if (!w) @@ -443,19 +456,24 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb) w->func = fib6_node_dump; arg.net = net; arg.nb = nb; + arg.extack = extack; w->args = &arg; for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv6.fib_table_hash[h]; struct fib6_table *tb; - hlist_for_each_entry_rcu(tb, head, tb6_hlist) - fib6_table_dump(net, tb, w); + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { + err = fib6_table_dump(net, tb, w); + if (err < 0) + goto out; + } } +out: kfree(w); - return 0; + return err; } static int fib6_dump_node(struct fib6_walker *w) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 857a89ad4d6c..bfa49ff70531 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -265,9 +265,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net) rtnl_unlock(); } -static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) +static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); + return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); } static unsigned int ip6mr_rules_seq_read(struct net *net) @@ -324,7 +325,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net) rtnl_unlock(); } -static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) +static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } @@ -1256,10 +1258,11 @@ static unsigned int ip6mr_seq_read(struct net *net) return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); } -static int ip6mr_dump(struct net *net, struct notifier_block *nb) +static int ip6mr_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, - ip6mr_mr_table_iter, &mrt_lock); + ip6mr_mr_table_iter, &mrt_lock, extack); } static struct notifier_block ip6_mr_notifier = { diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b11883d26875..33da6f738c99 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -485,7 +485,14 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); - if (ret) { + if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { + /* + * We didn't send the request yet, so don't need to check + * here if we already got a response, just mark as driver + * ready immediately. + */ + set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); + } else if (ret) { ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", sta->sta.addr, tid); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0a6ff01c68a9..d40744903fa9 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -538,7 +538,6 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct cfg80211_bss *cbss; - int err, changed = 0; sdata_assert_lock(sdata); @@ -560,13 +559,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) ifibss->chandef = sdata->csa_chandef; /* generate the beacon */ - err = ieee80211_ibss_csa_beacon(sdata, NULL); - if (err < 0) - return err; - - changed |= err; - - return changed; + return ieee80211_ibss_csa_beacon(sdata, NULL); } void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ee86c3333999..86bc469a28bc 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -70,7 +70,7 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) } /* return current EMWA throughput */ -int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg) { int usecs; @@ -79,13 +79,13 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) usecs = 1000000; /* reset thr. below 10% success */ - if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100)) + if (mr->stats.prob_avg < MINSTREL_FRAC(10, 100)) return 0; - if (prob_ewma > MINSTREL_FRAC(90, 100)) + if (prob_avg > MINSTREL_FRAC(90, 100)) return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs)); else - return MINSTREL_TRUNC(100000 * (prob_ewma / usecs)); + return MINSTREL_TRUNC(100000 * (prob_avg / usecs)); } /* find & sort topmost throughput rates */ @@ -98,8 +98,8 @@ minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) for (j = MAX_THR_RATES; j > 0; --j) { tmp_mrs = &mi->r[tp_list[j - 1]].stats; - if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <= - minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma)) + if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_avg) <= + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_avg)) break; } @@ -157,20 +157,24 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * Recalculate statistics and counters of a given rate */ void -minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs) +minstrel_calc_rate_stats(struct minstrel_priv *mp, + struct minstrel_rate_stats *mrs) { unsigned int cur_prob; if (unlikely(mrs->attempts > 0)) { mrs->sample_skipped = 0; cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); - if (unlikely(!mrs->att_hist)) { - mrs->prob_ewma = cur_prob; + if (mp->new_avg) { + minstrel_filter_avg_add(&mrs->prob_avg, + &mrs->prob_avg_1, cur_prob); + } else if (unlikely(!mrs->att_hist)) { + mrs->prob_avg = cur_prob; } else { /*update exponential weighted moving avarage */ - mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma, - cur_prob, - EWMA_LEVEL); + mrs->prob_avg = minstrel_ewma(mrs->prob_avg, + cur_prob, + EWMA_LEVEL); } mrs->att_hist += mrs->attempts; mrs->succ_hist += mrs->success; @@ -200,12 +204,12 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats; /* Update statistics of success probability per rate */ - minstrel_calc_rate_stats(mrs); + minstrel_calc_rate_stats(mp, mrs); /* Sample less often below the 10% chance of success. * Sample less often above the 95% chance of success. */ - if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) || - mrs->prob_ewma < MINSTREL_FRAC(10, 100)) { + if (mrs->prob_avg > MINSTREL_FRAC(95, 100) || + mrs->prob_avg < MINSTREL_FRAC(10, 100)) { mr->adjusted_retry_count = mrs->retry_count >> 1; if (mr->adjusted_retry_count > 2) mr->adjusted_retry_count = 2; @@ -225,14 +229,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * choose the maximum throughput rate as max_prob_rate * (2) if all success probabilities < 95%, the rate with * highest success probability is chosen as max_prob_rate */ - if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) { - tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma); + if (mrs->prob_avg >= MINSTREL_FRAC(95, 100)) { + tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_avg); tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate], - tmp_mrs->prob_ewma); + tmp_mrs->prob_avg); if (tmp_cur_tp >= tmp_prob_tp) tmp_prob_rate = i; } else { - if (mrs->prob_ewma >= tmp_mrs->prob_ewma) + if (mrs->prob_avg >= tmp_mrs->prob_avg) tmp_prob_rate = i; } } @@ -290,7 +294,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->sample_deferred--; if (time_after(jiffies, mi->last_stats_update + - (mp->update_interval * HZ) / 1000)) + mp->update_interval / (mp->new_avg ? 2 : 1))) minstrel_update_stats(mp, mi); } @@ -422,7 +426,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * has a probability of >95%, we shouldn't be attempting * to use it, as this only wastes precious airtime */ if (!mrr_capable && - (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100))) + (mi->r[ndx].stats.prob_avg > MINSTREL_FRAC(95, 100))) return; mi->prev_sample = true; @@ -573,7 +577,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta) * computing cur_tp */ tmp_mrs = &mi->r[idx].stats; - tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10; + tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_avg) * 10; tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024; return tmp_cur_tp; diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 51d8b2c846e7..dbb43bcd3c45 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -19,6 +19,21 @@ #define MAX_THR_RATES 4 /* + * Coefficients for moving average with noise filter (period=16), + * scaled by 10 bits + * + * a1 = exp(-pi * sqrt(2) / period) + * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period) + * coeff3 = -sqr(a1) + * coeff1 = 1 - coeff2 - coeff3 + */ +#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \ + MINSTREL_AVG_COEFF2 - \ + MINSTREL_AVG_COEFF3) +#define MINSTREL_AVG_COEFF2 0x00001499 +#define MINSTREL_AVG_COEFF3 -0x0000092e + +/* * Perform EWMA (Exponentially Weighted Moving Average) calculation */ static inline int @@ -32,6 +47,37 @@ minstrel_ewma(int old, int new, int weight) return old + incr; } +static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in) +{ + s32 out_1 = *prev_1; + s32 out_2 = *prev_2; + s32 val; + + if (!in) + in += 1; + + if (!out_1) { + val = out_1 = in; + goto out; + } + + val = MINSTREL_AVG_COEFF1 * in; + val += MINSTREL_AVG_COEFF2 * out_1; + val += MINSTREL_AVG_COEFF3 * out_2; + val >>= MINSTREL_SCALE; + + if (val > 1 << MINSTREL_SCALE) + val = 1 << MINSTREL_SCALE; + if (val < 0) + val = 1; + +out: + *prev_2 = out_1; + *prev_1 = val; + + return val; +} + struct minstrel_rate_stats { /* current / last sampling period attempts/success counters */ u16 attempts, last_attempts; @@ -40,8 +86,9 @@ struct minstrel_rate_stats { /* total attempts/success counters */ u32 att_hist, succ_hist; - /* prob_ewma - exponential weighted moving average of prob */ - u16 prob_ewma; + /* prob_avg - moving average of prob */ + u16 prob_avg; + u16 prob_avg_1; /* maximum retry counts */ u8 retry_count; @@ -95,6 +142,7 @@ struct minstrel_sta_info { struct minstrel_priv { struct ieee80211_hw *hw; bool has_mrr; + bool new_avg; u32 sample_switch; unsigned int cw_min; unsigned int cw_max; @@ -126,8 +174,9 @@ extern const struct rate_control_ops mac80211_minstrel; void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); /* Recalculate success probabilities and counters for a given rate using EWMA */ -void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs); -int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma); +void minstrel_calc_rate_stats(struct minstrel_priv *mp, + struct minstrel_rate_stats *mrs); +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg); /* debugfs */ int minstrel_stats_open(struct inode *inode, struct file *file); diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index c8afd85b51a0..9b8e0daeb7bb 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -90,8 +90,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) p += sprintf(p, "%6u ", mr->perfect_tx_time); tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); - tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u" " %3u %3u %-3u " @@ -147,8 +147,8 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file) p += sprintf(p, "%u,",mr->perfect_tx_time); tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); - tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u," "%llu,%llu,%d,%d\n", diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 0ef2633349b5..694a31978a04 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -346,12 +346,12 @@ minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi) */ int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, - int prob_ewma) + int prob_avg) { unsigned int nsecs = 0; /* do not account throughput if sucess prob is below 10% */ - if (prob_ewma < MINSTREL_FRAC(10, 100)) + if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; if (group != MINSTREL_CCK_GROUP) @@ -365,11 +365,11 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, * account for collision related packet error rate fluctuation * (prob is scaled - see MINSTREL_FRAC above) */ - if (prob_ewma > MINSTREL_FRAC(90, 100)) + if (prob_avg > MINSTREL_FRAC(90, 100)) return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000) / nsecs)); else - return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs)); + return MINSTREL_TRUNC(100000 * ((prob_avg * 1000) / nsecs)); } /* @@ -389,13 +389,13 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, cur_group = index / MCS_GROUP_RATES; cur_idx = index % MCS_GROUP_RATES; - cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma; + cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg; cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob); do { tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (cur_tp_avg < tmp_tp_avg || @@ -432,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) tmp_group = mi->max_prob_rate / MCS_GROUP_RATES; tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from @@ -444,11 +444,11 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES; max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; - max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma; + max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg; - if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) { + if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) { cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, - mrs->prob_ewma); + mrs->prob_avg); if (cur_tp_avg > tmp_tp_avg) mi->max_prob_rate = index; @@ -458,9 +458,9 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) if (cur_tp_avg > max_gpr_tp_avg) mg->max_group_prob_rate = index; } else { - if (mrs->prob_ewma > tmp_prob) + if (mrs->prob_avg > tmp_prob) mi->max_prob_rate = index; - if (mrs->prob_ewma > max_gpr_prob) + if (mrs->prob_avg > max_gpr_prob) mg->max_group_prob_rate = index; } } @@ -482,12 +482,12 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) { @@ -518,7 +518,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) continue; tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; - tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma; + tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg; if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) && (minstrel_mcs_groups[group].streams < tmp_max_streams)) { @@ -623,7 +623,7 @@ minstrel_ht_rate_sample_switch(struct minstrel_priv *mp, * If that fails, look again for a rate that is at least as fast */ mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); - faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100); + faster_rate = mrs->prob_avg > MINSTREL_FRAC(75, 100); minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate); if (!n_rates && faster_rate) minstrel_ht_find_probe_rates(mi, rates, &n_rates, false); @@ -737,8 +737,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, mrs = &mg->rates[i]; mrs->retry_updated = false; - minstrel_calc_rate_stats(mrs); - cur_prob = mrs->prob_ewma; + minstrel_calc_rate_stats(mp, mrs); + cur_prob = mrs->prob_avg; if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) continue; @@ -773,6 +773,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, /* try to sample all available rates during each interval */ mi->sample_count *= 8; + if (mp->new_avg) + mi->sample_count /= 2; if (sample) minstrel_ht_rate_sample_switch(mp, mi); @@ -889,6 +891,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_tx_rate *ar = info->status.rates; struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL; struct minstrel_priv *mp = priv; + u32 update_interval = mp->update_interval / 2; bool last, update = false; bool sample_status = false; int i; @@ -943,6 +946,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, switch (mi->sample_mode) { case MINSTREL_SAMPLE_IDLE: + if (mp->new_avg && + (mp->hw->max_rates > 1 || + mi->total_packets_cur < SAMPLE_SWITCH_THR)) + update_interval /= 2; break; case MINSTREL_SAMPLE_ACTIVE: @@ -970,23 +977,20 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, */ rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); if (rate->attempts > 30 && - MINSTREL_FRAC(rate->success, rate->attempts) < - MINSTREL_FRAC(20, 100)) { + rate->success < rate->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); update = true; } rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); if (rate2->attempts > 30 && - MINSTREL_FRAC(rate2->success, rate2->attempts) < - MINSTREL_FRAC(20, 100)) { + rate2->success < rate2->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); update = true; } } - if (time_after(jiffies, mi->last_stats_update + - (mp->update_interval / 2 * HZ) / 1000)) { + if (time_after(jiffies, mi->last_stats_update + update_interval)) { update = true; minstrel_ht_update_stats(mp, mi, true); } @@ -1008,7 +1012,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, unsigned int overhead = 0, overhead_rtscts = 0; mrs = minstrel_get_ratestats(mi, index); - if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) { + if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) { mrs->retry_count = 1; mrs->retry_count_rtscts = 1; return; @@ -1065,7 +1069,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (!mrs->retry_updated) minstrel_calc_retransmit(mp, mi, index); - if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { + if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { ratetbl->rate[offset].count = 2; ratetbl->rate[offset].count_rts = 2; ratetbl->rate[offset].count_cts = 2; @@ -1099,11 +1103,11 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, } static inline int -minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate) +minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate) { int group = rate / MCS_GROUP_RATES; rate %= MCS_GROUP_RATES; - return mi->groups[group].rates[rate].prob_ewma; + return mi->groups[group].rates[rate].prob_avg; } static int @@ -1115,7 +1119,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) unsigned int duration; /* Disable A-MSDU if max_prob_rate is bad */ - if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100)) + if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100)) return 1; duration = g->duration[rate]; @@ -1138,7 +1142,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) * data packet size */ if (duration > MCS_DURATION(1, 0, 260) || - (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) < + (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) < MINSTREL_FRAC(75, 100))) return 3200; @@ -1243,7 +1247,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) * rate, to avoid wasting airtime. */ sample_dur = minstrel_get_duration(sample_idx); - if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) || + if (mrs->prob_avg > MINSTREL_FRAC(95, 100) || minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur) return -1; @@ -1666,7 +1670,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->has_mrr = true; mp->hw = hw; - mp->update_interval = 100; + mp->update_interval = HZ / 10; + mp->new_avg = true; #ifdef CONFIG_MAC80211_DEBUGFS mp->fixed_rate_idx = (u32) -1; @@ -1674,6 +1679,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) &mp->fixed_rate_idx); debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir, &mp->sample_switch); + debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir, + &mp->new_avg); #endif minstrel_ht_init_cck_rates(mp); @@ -1698,7 +1705,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) i = mi->max_tp_rate[0] / MCS_GROUP_RATES; j = mi->max_tp_rate[0] % MCS_GROUP_RATES; - prob = mi->groups[i].rates[j].prob_ewma; + prob = mi->groups[i].rates[j].prob_avg; /* convert tp_avg from pkt per second in kbps */ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10; diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index f938701e7ab7..53ea3c29debf 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -119,6 +119,6 @@ struct minstrel_ht_sta_priv { void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, - int prob_ewma); + int prob_avg); #endif diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 5a6e9f3edc04..bebb71917742 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -98,8 +98,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) p += sprintf(p, "%6u ", tx_time); tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); - tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u" " %3u %3u %-3u " @@ -243,8 +243,8 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) p += sprintf(p, "%u,", tx_time); tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); - tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); - eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg); + eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u," "%u,%llu,%llu,", diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1fa422782905..938c10f7955b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1617,7 +1617,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct sta_info *sta, struct sk_buff_head *skbs, bool txpending) { @@ -1679,7 +1679,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); info->control.vif = vif; - control.sta = sta; + control.sta = sta ? &sta->sta : NULL; __skb_unlink(skb, skbs); drv_tx(local, &control, skb); @@ -1698,7 +1698,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local, struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata; struct ieee80211_vif *vif; - struct ieee80211_sta *pubsta; struct sk_buff *skb; bool result = true; __le16 fc; @@ -1713,11 +1712,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local, if (sta && !sta->uploaded) sta = NULL; - if (sta) - pubsta = &sta->sta; - else - pubsta = NULL; - switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { @@ -1744,8 +1738,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } - result = ieee80211_tx_frags(local, vif, pubsta, skbs, - txpending); + result = ieee80211_tx_frags(local, vif, sta, skbs, txpending); ieee80211_tpt_led_trig_tx(local, fc, led_len); @@ -3529,7 +3522,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, struct ieee80211_sub_if_data, u.ap); __skb_queue_tail(&tx.skbs, skb); - ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false); + ieee80211_tx_frags(local, &sdata->vif, sta, &tx.skbs, false); return true; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index efccd1ac9a66..0522b2b1fd95 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -458,10 +458,63 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static struct genl_dumpit_info *genl_dumpit_info_alloc(void) +{ + return kmalloc(sizeof(struct genl_dumpit_info), GFP_KERNEL); +} + +static void genl_dumpit_info_free(const struct genl_dumpit_info *info) +{ + kfree(info); +} + +static struct nlattr ** +genl_family_rcv_msg_attrs_parse(const struct genl_family *family, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + const struct genl_ops *ops, + int hdrlen, + enum genl_validate_flags no_strict_flag, + bool parallel) +{ + enum netlink_validation validate = ops->validate & no_strict_flag ? + NL_VALIDATE_LIBERAL : + NL_VALIDATE_STRICT; + struct nlattr **attrbuf; + int err; + + if (!family->maxattr) + return NULL; + + if (parallel) { + attrbuf = kmalloc_array(family->maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) + return ERR_PTR(-ENOMEM); + } else { + attrbuf = family->attrbuf; + } + + err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, + family->policy, validate, extack); + if (err && parallel) { + kfree(attrbuf); + return ERR_PTR(err); + } + return attrbuf; +} + +static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, + struct nlattr **attrbuf, + bool parallel) +{ + if (parallel) + kfree(attrbuf); +} + static int genl_lock_start(struct netlink_callback *cb) { - /* our ops are always const - netlink API doesn't propagate that */ - const struct genl_ops *ops = cb->data; + const struct genl_ops *ops = genl_dumpit_info(cb)->ops; int rc = 0; if (ops->start) { @@ -474,8 +527,7 @@ static int genl_lock_start(struct netlink_callback *cb) static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - /* our ops are always const - netlink API doesn't propagate that */ - const struct genl_ops *ops = cb->data; + const struct genl_ops *ops = genl_dumpit_info(cb)->ops; int rc; genl_lock(); @@ -486,8 +538,8 @@ static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) static int genl_lock_done(struct netlink_callback *cb) { - /* our ops are always const - netlink API doesn't propagate that */ - const struct genl_ops *ops = cb->data; + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + const struct genl_ops *ops = info->ops; int rc = 0; if (ops->done) { @@ -495,120 +547,111 @@ static int genl_lock_done(struct netlink_callback *cb) rc = ops->done(cb); genl_unlock(); } + genl_family_rcv_msg_attrs_free(info->family, info->attrs, true); + genl_dumpit_info_free(info); return rc; } -static int genl_family_rcv_msg(const struct genl_family *family, - struct sk_buff *skb, - struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int genl_parallel_done(struct netlink_callback *cb) { - const struct genl_ops *ops; - struct net *net = sock_net(skb->sk); - struct genl_info info; - struct genlmsghdr *hdr = nlmsg_data(nlh); - struct nlattr **attrbuf; - int hdrlen, err; + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + const struct genl_ops *ops = info->ops; + int rc = 0; - /* this family doesn't exist in this netns */ - if (!family->netnsok && !net_eq(net, &init_net)) - return -ENOENT; + if (ops->done) + rc = ops->done(cb); + genl_family_rcv_msg_attrs_free(info->family, info->attrs, true); + genl_dumpit_info_free(info); + return rc; +} - hdrlen = GENL_HDRLEN + family->hdrsize; - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) - return -EINVAL; +static int genl_family_rcv_msg_dumpit(const struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + const struct genl_ops *ops, + int hdrlen, struct net *net) +{ + struct genl_dumpit_info *info; + struct nlattr **attrs = NULL; + int err; - ops = genl_get_cmd(hdr->cmd, family); - if (ops == NULL) + if (!ops->dumpit) return -EOPNOTSUPP; - if ((ops->flags & GENL_ADMIN_PERM) && - !netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if ((ops->flags & GENL_UNS_ADMIN_PERM) && - !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { - int rc; - - if (ops->dumpit == NULL) - return -EOPNOTSUPP; - - if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) { - int hdrlen = GENL_HDRLEN + family->hdrsize; - - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) - return -EINVAL; + if (ops->validate & GENL_DONT_VALIDATE_DUMP) + goto no_attrs; - if (family->maxattr) { - unsigned int validate = NL_VALIDATE_STRICT; - - if (ops->validate & - GENL_DONT_VALIDATE_DUMP_STRICT) - validate = NL_VALIDATE_LIBERAL; - rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), - family->maxattr, - family->policy, - validate, extack); - if (rc) - return rc; - } - } + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; - if (!family->parallel_ops) { - struct netlink_dump_control c = { - .module = family->module, - /* we have const, but the netlink API doesn't */ - .data = (void *)ops, - .start = genl_lock_start, - .dump = genl_lock_dumpit, - .done = genl_lock_done, - }; + attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack, + ops, hdrlen, + GENL_DONT_VALIDATE_DUMP_STRICT, + true); + if (IS_ERR(attrs)) + return PTR_ERR(attrs); + +no_attrs: + /* Allocate dumpit info. It is going to be freed by done() callback. */ + info = genl_dumpit_info_alloc(); + if (!info) { + genl_family_rcv_msg_attrs_free(family, attrs, true); + return -ENOMEM; + } - genl_unlock(); - rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); - genl_lock(); + info->family = family; + info->ops = ops; + info->attrs = attrs; - } else { - struct netlink_dump_control c = { - .module = family->module, - .start = ops->start, - .dump = ops->dumpit, - .done = ops->done, - }; + if (!family->parallel_ops) { + struct netlink_dump_control c = { + .module = family->module, + .data = info, + .start = genl_lock_start, + .dump = genl_lock_dumpit, + .done = genl_lock_done, + }; - rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); - } + genl_unlock(); + err = __netlink_dump_start(net->genl_sock, skb, nlh, &c); + genl_lock(); - return rc; + } else { + struct netlink_dump_control c = { + .module = family->module, + .data = info, + .start = ops->start, + .dump = ops->dumpit, + .done = genl_parallel_done, + }; + + err = __netlink_dump_start(net->genl_sock, skb, nlh, &c); } - if (ops->doit == NULL) - return -EOPNOTSUPP; - - if (family->maxattr && family->parallel_ops) { - attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), - GFP_KERNEL); - if (attrbuf == NULL) - return -ENOMEM; - } else - attrbuf = family->attrbuf; + return err; +} - if (attrbuf) { - enum netlink_validation validate = NL_VALIDATE_STRICT; +static int genl_family_rcv_msg_doit(const struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + const struct genl_ops *ops, + int hdrlen, struct net *net) +{ + struct nlattr **attrbuf; + struct genl_info info; + int err; - if (ops->validate & GENL_DONT_VALIDATE_STRICT) - validate = NL_VALIDATE_LIBERAL; + if (!ops->doit) + return -EOPNOTSUPP; - err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - family->policy, validate, extack); - if (err < 0) - goto out; - } + attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack, + ops, hdrlen, + GENL_DONT_VALIDATE_STRICT, + family->parallel_ops); + if (IS_ERR(attrbuf)) + return PTR_ERR(attrbuf); info.snd_seq = nlh->nlmsg_seq; info.snd_portid = NETLINK_CB(skb).portid; @@ -632,12 +675,49 @@ static int genl_family_rcv_msg(const struct genl_family *family, family->post_doit(ops, skb, &info); out: - if (family->parallel_ops) - kfree(attrbuf); + genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops); return err; } +static int genl_family_rcv_msg(const struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + const struct genl_ops *ops; + struct net *net = sock_net(skb->sk); + struct genlmsghdr *hdr = nlmsg_data(nlh); + int hdrlen; + + /* this family doesn't exist in this netns */ + if (!family->netnsok && !net_eq(net, &init_net)) + return -ENOENT; + + hdrlen = GENL_HDRLEN + family->hdrsize; + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + ops = genl_get_cmd(hdr->cmd, family); + if (ops == NULL) + return -EOPNOTSUPP; + + if ((ops->flags & GENL_ADMIN_PERM) && + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + + if ((ops->flags & GENL_UNS_ADMIN_PERM) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) + return genl_family_rcv_msg_dumpit(family, skb, nlh, extack, + ops, hdrlen, net); + else + return genl_family_rcv_msg_doit(family, skb, nlh, extack, + ops, hdrlen, net); +} + static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -1088,25 +1168,6 @@ problem: subsys_initcall(genl_init); -/** - * genl_family_attrbuf - return family's attrbuf - * @family: the family - * - * Return the family's attrbuf, while validating that it's - * actually valid to access it. - * - * You cannot use this function with a family that has parallel_ops - * and you can only use it within (pre/post) doit/dumpit callbacks. - */ -struct nlattr **genl_family_attrbuf(const struct genl_family *family) -{ - if (!WARN_ON(family->parallel_ops)) - lockdep_assert_held(&genl_mutex); - - return family->attrbuf; -} -EXPORT_SYMBOL(genl_family_attrbuf); - static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 17e6ca62f1be..fd9ad534dd9b 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -102,22 +102,14 @@ nla_put_failure: static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) { - struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family); + const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct nfc_dev *dev; - int rc; u32 idx; - rc = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nfc_genl_family.hdrsize, - attrbuf, nfc_genl_family.maxattr, - nfc_genl_policy, NULL); - if (rc < 0) - return ERR_PTR(rc); - - if (!attrbuf[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return ERR_PTR(-EINVAL); - idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]); + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) @@ -1697,7 +1689,8 @@ static const struct genl_ops nfc_genl_ops[] = { }, { .cmd = NFC_CMD_GET_TARGET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, }, diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 05249eb45082..df9c80bf621d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -971,6 +971,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, ct = nf_ct_get(skb, &ctinfo); if (ct) { + bool add_helper = false; + /* Packets starting a new connection must be NATted before the * helper, so that the helper knows about the NAT. We enforce * this by delaying both NAT and helper calls for unconfirmed @@ -988,16 +990,17 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, } /* Userspace may decide to perform a ct lookup without a helper - * specified followed by a (recirculate and) commit with one. - * Therefore, for unconfirmed connections which we will commit, - * we need to attach the helper here. + * specified followed by a (recirculate and) commit with one, + * or attach a helper in a later commit. Therefore, for + * connections which we will commit, we may need to attach + * the helper here. */ - if (!nf_ct_is_confirmed(ct) && info->commit && - info->helper && !nfct_help(ct)) { + if (info->commit && info->helper && !nfct_help(ct)) { int err = __nf_ct_try_assign_helper(ct, info->ct, GFP_ATOMIC); if (err) return err; + add_helper = true; /* helper installed, add seqadj if NAT is required */ if (info->nat && !nfct_seqadj(ct)) { @@ -1007,11 +1010,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, } /* Call the helper only if: - * - nf_conntrack_in() was executed above ("!cached") for a - * confirmed connection, or + * - nf_conntrack_in() was executed above ("!cached") or a + * helper was just attached ("add_helper") for a confirmed + * connection, or * - When committing an unconfirmed connection. */ - if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) && + if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : + info->commit) && ovs_ct_helper(skb, info->family) != NF_ACCEPT) { return -EINVAL; } diff --git a/net/rds/ib.c b/net/rds/ib.c index 9de2ae22d583..3fd5f40189bd 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,6 +30,7 @@ * SOFTWARE. * */ +#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> @@ -107,6 +108,7 @@ static void rds_ib_dev_free(struct work_struct *work) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); + dma_pool_destroy(rds_ibdev->rid_hdrs_pool); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); @@ -182,6 +184,12 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->pd = NULL; goto put_dev; } + rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, + device->dma_device, + sizeof(struct rds_header), + L1_CACHE_BYTES, 0); + if (!rds_ibdev->rid_hdrs_pool) + goto put_dev; rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); diff --git a/net/rds/ib.h b/net/rds/ib.h index f2b558e8b5ea..6e6f24753998 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -165,8 +165,8 @@ struct rds_ib_connection { /* tx */ struct rds_ib_work_ring i_send_ring; struct rm_data_op *i_data_op; - struct rds_header *i_send_hdrs; - dma_addr_t i_send_hdrs_dma; + struct rds_header **i_send_hdrs; + dma_addr_t *i_send_hdrs_dma; struct rds_ib_send_work *i_sends; atomic_t i_signaled_sends; @@ -175,8 +175,8 @@ struct rds_ib_connection { struct rds_ib_work_ring i_recv_ring; struct rds_ib_incoming *i_ibinc; u32 i_recv_data_rem; - struct rds_header *i_recv_hdrs; - dma_addr_t i_recv_hdrs_dma; + struct rds_header **i_recv_hdrs; + dma_addr_t *i_recv_hdrs_dma; struct rds_ib_recv_work *i_recvs; u64 i_ack_recv; /* last ACK received */ struct rds_ib_refill_cache i_cache_incs; @@ -246,6 +246,7 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; + struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ bool use_fastreg; unsigned int max_mrs; @@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event); - +struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, + struct dma_pool *pool, + dma_addr_t **dma_addrs, u32 num_hdrs); +void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, + dma_addr_t *dma_addrs, u32 num_hdrs); #define rds_ib_conn_error(conn, fmt...) \ __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 233f1368162b..6b345c858dba 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,6 +30,7 @@ * SOFTWARE. * */ +#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/slab.h> @@ -439,6 +440,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) rds_ibdev->vector_load[index]--; } +/* Allocate DMA coherent memory to be used to store struct rds_header for + * sending/receiving packets. The pointers to the DMA memory and the + * associated DMA addresses are stored in two arrays. + * + * @ibdev: the IB device + * @pool: the DMA memory pool + * @dma_addrs: pointer to the array for storing DMA addresses + * @num_hdrs: number of headers to allocate + * + * It returns the pointer to the array storing the DMA memory pointers. On + * error, NULL pointer is returned. + */ +struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, + struct dma_pool *pool, + dma_addr_t **dma_addrs, u32 num_hdrs) +{ + struct rds_header **hdrs; + dma_addr_t *hdr_daddrs; + u32 i; + + hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, + ibdev_to_node(ibdev)); + if (!hdrs) + return NULL; + + hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, + ibdev_to_node(ibdev)); + if (!hdr_daddrs) { + kvfree(hdrs); + return NULL; + } + + for (i = 0; i < num_hdrs; i++) { + hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); + if (!hdrs[i]) { + rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); + return NULL; + } + } + + *dma_addrs = hdr_daddrs; + return hdrs; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @pool: the DMA memory pool + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, + dma_addr_t *dma_addrs, u32 num_hdrs) +{ + u32 i; + + for (i = 0; i < num_hdrs; i++) + dma_pool_free(pool, hdrs[i], dma_addrs[i]); + kvfree(hdrs); + kvfree(dma_addrs); +} + /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -451,6 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct ib_cq_init_attr cq_attr = {}; struct rds_ib_device *rds_ibdev; int ret, fr_queue_space; + struct dma_pool *pool; /* * It's normal to see a null device if an incoming connection races @@ -541,31 +605,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto recv_cq_out; } - ic->i_send_hdrs = ib_dma_alloc_coherent(dev, - ic->i_send_ring.w_nr * - sizeof(struct rds_header), - &ic->i_send_hdrs_dma, GFP_KERNEL); + pool = rds_ibdev->rid_hdrs_pool; + ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr); if (!ic->i_send_hdrs) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent send failed\n"); + rdsdebug("DMA send hdrs alloc failed\n"); goto qp_out; } - ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, - ic->i_recv_ring.w_nr * - sizeof(struct rds_header), - &ic->i_recv_hdrs_dma, GFP_KERNEL); + ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr); if (!ic->i_recv_hdrs) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent recv failed\n"); + rdsdebug("DMA recv hdrs alloc failed\n"); goto send_hdrs_dma_out; } - ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), - &ic->i_ack_dma, GFP_KERNEL); + ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, + &ic->i_ack_dma); if (!ic->i_ack) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent ack failed\n"); + rdsdebug("DMA ack header alloc failed\n"); goto recv_hdrs_dma_out; } @@ -596,17 +657,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn) sends_out: vfree(ic->i_sends); + ack_dma_out: - ib_dma_free_coherent(dev, sizeof(struct rds_header), - ic->i_ack, ic->i_ack_dma); + dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + ic->i_ack = NULL; + recv_hdrs_dma_out: - ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * - sizeof(struct rds_header), - ic->i_recv_hdrs, ic->i_recv_hdrs_dma); + rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = NULL; + ic->i_recv_hdrs_dma = NULL; + send_hdrs_dma_out: - ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * - sizeof(struct rds_header), - ic->i_send_hdrs, ic->i_send_hdrs_dma); + rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr); + ic->i_send_hdrs = NULL; + ic->i_send_hdrs_dma = NULL; + qp_out: rdma_destroy_qp(ic->i_cm_id); recv_cq_out: @@ -984,8 +1051,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ic->i_cm_id ? ic->i_cm_id->qp : NULL); if (ic->i_cm_id) { - struct ib_device *dev = ic->i_cm_id->device; - rdsdebug("disconnecting cm %p\n", ic->i_cm_id); err = rdma_disconnect(ic->i_cm_id); if (err) { @@ -1035,24 +1100,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ib_destroy_cq(ic->i_recv_cq); } - /* then free the resources that ib callbacks use */ - if (ic->i_send_hdrs) - ib_dma_free_coherent(dev, - ic->i_send_ring.w_nr * - sizeof(struct rds_header), - ic->i_send_hdrs, - ic->i_send_hdrs_dma); - - if (ic->i_recv_hdrs) - ib_dma_free_coherent(dev, - ic->i_recv_ring.w_nr * - sizeof(struct rds_header), - ic->i_recv_hdrs, - ic->i_recv_hdrs_dma); - - if (ic->i_ack) - ib_dma_free_coherent(dev, sizeof(struct rds_header), - ic->i_ack, ic->i_ack_dma); + if (ic->rds_ibdev) { + struct dma_pool *pool; + + pool = ic->rds_ibdev->rid_hdrs_pool; + + /* then free the resources that ib callbacks use */ + if (ic->i_send_hdrs) { + rds_dma_hdrs_free(pool, ic->i_send_hdrs, + ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr); + ic->i_send_hdrs = NULL; + ic->i_send_hdrs_dma = NULL; + } + + if (ic->i_recv_hdrs) { + rds_dma_hdrs_free(pool, ic->i_recv_hdrs, + ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = NULL; + ic->i_recv_hdrs_dma = NULL; + } + + if (ic->i_ack) { + dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + ic->i_ack = NULL; + } + } else { + WARN_ON(ic->i_send_hdrs); + WARN_ON(ic->i_send_hdrs_dma); + WARN_ON(ic->i_recv_hdrs); + WARN_ON(ic->i_recv_hdrs_dma); + WARN_ON(ic->i_ack); + } if (ic->i_sends) rds_ib_send_clear_ring(ic); @@ -1071,9 +1151,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ic->i_pd = NULL; ic->i_send_cq = NULL; ic->i_recv_cq = NULL; - ic->i_send_hdrs = NULL; - ic->i_recv_hdrs = NULL; - ic->i_ack = NULL; } BUG_ON(ic->rds_ibdev); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index a0f99bbf362c..694d411dc72f 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic) recv->r_wr.num_sge = RDS_IB_RECV_SGE; sge = &recv->r_sge[0]; - sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); + sge->addr = ic->i_recv_hdrs_dma[i]; sge->length = sizeof(struct rds_header); sge->lkey = ic->i_pd->local_dma_lkey; @@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, WARN_ON(ret != 1); sge = &recv->r_sge[0]; - sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); + sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; sge->length = sizeof(struct rds_header); sge = &recv->r_sge[1]; @@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, } data_len -= sizeof(struct rds_header); - ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; + ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { @@ -993,10 +993,11 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, } else { /* We expect errors as the qp is drained during shutdown */ if (rds_conn_up(conn) || rds_conn_connecting(conn)) - rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n", + rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos, wc->status, - ib_wc_status_msg(wc->status)); + ib_wc_status_msg(wc->status), + wc->vendor_err); } /* rds_ib_process_recv() doesn't always consume the frag, and diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe6237dafe2..d1cc1d7778d8 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic) send->s_wr.ex.imm_data = 0; sge = &send->s_sge[0]; - sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); + sge->addr = ic->i_send_hdrs_dma[i]; + sge->length = sizeof(struct rds_header); sge->lkey = ic->i_pd->local_dma_lkey; @@ -300,10 +301,10 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) /* We expect errors as the qp is drained during shutdown */ if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { - rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n", + rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos, wc->status, - ib_wc_status_msg(wc->status)); + ib_wc_status_msg(wc->status), wc->vendor_err); } } @@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send->s_queued = jiffies; send->s_op = NULL; - send->s_sge[0].addr = ic->i_send_hdrs_dma - + (pos * sizeof(struct rds_header)); + send->s_sge[0].addr = ic->i_send_hdrs_dma[pos]; + send->s_sge[0].length = sizeof(struct rds_header); - memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); + memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, + sizeof(struct rds_header)); + /* Set up the data, if present */ if (i < work_alloc @@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, &send->s_wr, send->s_wr.num_sge, send->s_wr.next); if (ic->i_flowctl && adv_credits) { - struct rds_header *hdr = &ic->i_send_hdrs[pos]; + struct rds_header *hdr = ic->i_send_hdrs[pos]; /* add credit and redo the header checksum */ hdr->h_credit = adv_credits; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 64830d8c1fdb..452163eadb98 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -209,6 +209,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, */ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) { + const void *here = __builtin_return_address(0); struct rxrpc_peer *peer; _enter(""); @@ -230,6 +231,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer->cong_cwnd = 3; else peer->cong_cwnd = 4; + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here); } _leave(" = %p", peer); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 98dd87ce1510..b1c7e726ce5d 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -530,8 +530,7 @@ begin: fq_flow_set_throttled(q, f); goto begin; } - if (time_next_packet && - (s64)(now - time_next_packet - q->ce_threshold) > 0) { + if ((s64)(now - time_next_packet - q->ce_threshold) > 0) { INET_ECN_set_ce(skb); q->stat_ce_mark++; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 17bd8f539bc7..ed5b0e9fd395 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -382,13 +382,8 @@ void __qdisc_run(struct Qdisc *q) int packets; while (qdisc_restart(q, &packets)) { - /* - * Ordered by possible occurrence: Postpone processing if - * 1. we've exceeded packet quota - * 2. another process needs the CPU; - */ quota -= packets; - if (quota <= 0 || need_resched()) { + if (quota <= 0) { __netif_schedule(q); break; } @@ -1217,8 +1212,13 @@ void dev_deactivate_many(struct list_head *head) /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) { - while (some_qdisc_is_busy(dev)) - yield(); + while (some_qdisc_is_busy(dev)) { + /* wait_event() would avoid this sleep-loop but would + * require expensive checks in the fast paths of packet + * processing which isn't worth it. + */ + schedule_timeout_uninterruptible(1); + } /* The new qdisc is assigned at this point so we can safely * unwind stale skb lists and qdisc statistics */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d2ffc9a0ba3a..1ba893b85dad 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -429,6 +429,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, changeover = 1 ; asoc->peer.primary_path = transport; + sctp_ulpevent_nofity_peer_addr_change(transport, + SCTP_ADDR_MADE_PRIM, 0); /* Set a default msg_name for events. */ memcpy(&asoc->peer.primary_addr, &transport->ipaddr, @@ -569,6 +571,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, asoc->peer.transport_count--; + sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0); sctp_transport_free(peer); } @@ -707,6 +710,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); asoc->peer.transport_count++; + sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0); + /* If we do not yet have a primary path, set one. */ if (!asoc->peer.primary_path) { sctp_assoc_set_primary(asoc, peer); @@ -781,10 +786,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, enum sctp_transport_cmd command, sctp_sn_error_t error) { - struct sctp_ulpevent *event; - struct sockaddr_storage addr; - int spc_state = 0; bool ulp_notify = true; + int spc_state = 0; /* Record the transition on the transport. */ switch (command) { @@ -836,16 +839,9 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Generate and send a SCTP_PEER_ADDR_CHANGE notification * to the user. */ - if (ulp_notify) { - memset(&addr, 0, sizeof(struct sockaddr_storage)); - memcpy(&addr, &transport->ipaddr, - transport->af_specific->sockaddr_len); - - event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, - 0, spc_state, error, GFP_ATOMIC); - if (event) - asoc->stream.si->enqueue_event(&asoc->ulpq, event); - } + if (ulp_notify) + sctp_ulpevent_nofity_peer_addr_change(transport, + spc_state, error); /* Select new active and retran paths. */ sctp_select_active_and_retran_path(asoc); diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index cc0405c79dfc..cc3ce5d80b08 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -75,41 +75,39 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) struct list_head *pos, *temp; struct sctp_chunk *chunk; struct sctp_ulpevent *ev; - int error = 0, notify; - - /* If we failed, we may need to notify. */ - notify = msg->send_failed ? -1 : 0; + int error, sent; /* Release all references. */ list_for_each_safe(pos, temp, &msg->chunks) { list_del_init(pos); chunk = list_entry(pos, struct sctp_chunk, frag_list); - /* Check whether we _really_ need to notify. */ - if (notify < 0) { - asoc = chunk->asoc; - if (msg->send_error) - error = msg->send_error; - else - error = asoc->outqueue.error; - - notify = sctp_ulpevent_type_enabled(asoc->subscribe, - SCTP_SEND_FAILED); + + if (!msg->send_failed) { + sctp_chunk_put(chunk); + continue; } - /* Generate a SEND FAILED event only if enabled. */ - if (notify > 0) { - int sent; - if (chunk->has_tsn) - sent = SCTP_DATA_SENT; - else - sent = SCTP_DATA_UNSENT; + asoc = chunk->asoc; + error = msg->send_error ?: asoc->outqueue.error; + sent = chunk->has_tsn ? SCTP_DATA_SENT : SCTP_DATA_UNSENT; + if (sctp_ulpevent_type_enabled(asoc->subscribe, + SCTP_SEND_FAILED)) { ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent, error, GFP_ATOMIC); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } + if (sctp_ulpevent_type_enabled(asoc->subscribe, + SCTP_SEND_FAILED_EVENT)) { + ev = sctp_ulpevent_make_send_failed_event(asoc, chunk, + sent, error, + GFP_ATOMIC); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_chunk_put(chunk); } diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index e0cc1edf49a0..c82dbdcf13f2 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -238,7 +238,7 @@ fail: * When a destination address on a multi-homed peer encounters a change * an interface details event is sent. */ -struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( +static struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( const struct sctp_association *asoc, const struct sockaddr_storage *aaddr, int flags, int state, int error, gfp_t gfp) @@ -336,6 +336,22 @@ fail: return NULL; } +void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport, + int state, int error) +{ + struct sctp_association *asoc = transport->asoc; + struct sockaddr_storage addr; + struct sctp_ulpevent *event; + + memset(&addr, 0, sizeof(struct sockaddr_storage)); + memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); + + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, state, + error, GFP_ATOMIC); + if (event) + asoc->stream.si->enqueue_event(&asoc->ulpq, event); +} + /* Create and initialize an SCTP_REMOTE_ERROR notification. * * Note: This assumes that the chunk->skb->data already points to the @@ -511,6 +527,45 @@ fail: return NULL; } +struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event( + const struct sctp_association *asoc, struct sctp_chunk *chunk, + __u16 flags, __u32 error, gfp_t gfp) +{ + struct sctp_send_failed_event *ssf; + struct sctp_ulpevent *event; + struct sk_buff *skb; + int len; + + skb = skb_copy_expand(chunk->skb, sizeof(*ssf), 0, gfp); + if (!skb) + return NULL; + + len = ntohs(chunk->chunk_hdr->length); + len -= sctp_datachk_len(&asoc->stream); + + skb_pull(skb, sctp_datachk_len(&asoc->stream)); + event = sctp_skb2event(skb); + sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); + + ssf = skb_push(skb, sizeof(*ssf)); + ssf->ssf_type = SCTP_SEND_FAILED_EVENT; + ssf->ssf_flags = flags; + ssf->ssf_length = sizeof(*ssf) + len; + skb_trim(skb, ssf->ssf_length); + ssf->ssf_error = error; + + ssf->ssfe_info.snd_sid = chunk->sinfo.sinfo_stream; + ssf->ssfe_info.snd_ppid = chunk->sinfo.sinfo_ppid; + ssf->ssfe_info.snd_context = chunk->sinfo.sinfo_context; + ssf->ssfe_info.snd_assoc_id = chunk->sinfo.sinfo_assoc_id; + ssf->ssfe_info.snd_flags = chunk->chunk_hdr->flags; + + sctp_ulpevent_set_owner(event, asoc); + ssf->ssf_assoc_id = sctp_assoc2id(asoc); + + return event; +} + /* Create and initialize a SCTP_SHUTDOWN_EVENT notification. * * Socket Extensions for SCTP - draft-01 diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index fc06720b53c1..1a858e59fc31 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -65,8 +65,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) rc = sk_wait_event(sk, &timeout, !smc_tx_prepared_sends(&smc->conn) || - (sk->sk_err == ECONNABORTED) || - (sk->sk_err == ECONNRESET), + sk->sk_err == ECONNABORTED || + sk->sk_err == ECONNRESET, &wait); if (rc) break; @@ -113,9 +113,6 @@ static void smc_close_active_abort(struct smc_sock *smc) { struct sock *sk = &smc->sk; - struct smc_cdc_conn_state_flags *txflags = - &smc->conn.local_tx_ctrl.conn_state_flags; - if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { sk->sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { @@ -129,35 +126,26 @@ static void smc_close_active_abort(struct smc_sock *smc) release_sock(sk); cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); + sk->sk_state = SMC_CLOSED; sock_put(sk); /* passive closing */ break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - if (!smc_cdc_rxed_any_close(&smc->conn)) - sk->sk_state = SMC_PEERABORTWAIT; - else - sk->sk_state = SMC_CLOSED; release_sock(sk); cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); + sk->sk_state = SMC_CLOSED; break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: - if (!txflags->peer_conn_closed) { - /* just SHUTDOWN_SEND done */ - sk->sk_state = SMC_PEERABORTWAIT; - } else { - sk->sk_state = SMC_CLOSED; - } + case SMC_PEERFINCLOSEWAIT: + sk->sk_state = SMC_CLOSED; sock_put(sk); /* passive closing */ break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: sk->sk_state = SMC_CLOSED; break; - case SMC_PEERFINCLOSEWAIT: - sock_put(sk); /* passive closing */ - break; case SMC_INIT: case SMC_PEERABORTWAIT: case SMC_CLOSED: @@ -215,8 +203,6 @@ again: if (sk->sk_state == SMC_ACTIVE) { /* send close request */ rc = smc_close_final(conn); - if (rc) - break; sk->sk_state = SMC_PEERCLOSEWAIT1; } else { /* peer event has changed the state */ @@ -229,8 +215,6 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); - if (rc) - break; } sk->sk_state = SMC_CLOSED; break; @@ -246,8 +230,6 @@ again: goto again; /* confirm close from peer */ rc = smc_close_final(conn); - if (rc) - break; if (smc_cdc_rxed_any_close(conn)) { /* peer has closed the socket already */ sk->sk_state = SMC_CLOSED; @@ -263,8 +245,6 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); - if (rc) - break; } /* peer sending PeerConnectionClosed will cause transition */ break; @@ -272,10 +252,12 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - smc_close_abort(conn); + rc = smc_close_abort(conn); sk->sk_state = SMC_CLOSED; break; case SMC_PEERABORTWAIT: + sk->sk_state = SMC_CLOSED; + break; case SMC_CLOSED: /* nothing to do, add tracing in future patch */ break; @@ -451,8 +433,6 @@ again: goto again; /* send close wr request */ rc = smc_close_wr(conn); - if (rc) - break; sk->sk_state = SMC_PEERCLOSEWAIT1; break; case SMC_APPCLOSEWAIT1: @@ -466,8 +446,6 @@ again: goto again; /* confirm close from peer */ rc = smc_close_wr(conn); - if (rc) - break; sk->sk_state = SMC_APPCLOSEWAIT2; break; case SMC_APPCLOSEWAIT2: diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 88556f0251ab..de9bf035f545 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -42,6 +42,19 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc); +/* return head of link group list and its lock for a given link group */ +static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, + spinlock_t **lgr_lock) +{ + if (lgr->is_smcd) { + *lgr_lock = &lgr->smcd->lgr_lock; + return &lgr->smcd->lgr_list; + } + + *lgr_lock = &smc_lgr_list.lock; + return &smc_lgr_list.list; +} + static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) { /* client link group creation always follows the server link group @@ -157,19 +170,21 @@ static void smc_lgr_free_work(struct work_struct *work) struct smc_link_group *lgr = container_of(to_delayed_work(work), struct smc_link_group, free_work); + spinlock_t *lgr_lock; bool conns; - spin_lock_bh(&smc_lgr_list.lock); + smc_lgr_list_head(lgr, &lgr_lock); + spin_lock_bh(lgr_lock); read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); if (!conns) { /* number of lgr connections is no longer zero */ - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); return; } if (!list_empty(&lgr->list)) list_del_init(&lgr->list); /* remove from smc_lgr_list */ - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); if (!lgr->is_smcd && !lgr->terminating) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; @@ -198,7 +213,9 @@ static void smc_lgr_free_work(struct work_struct *work) static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_link_group *lgr; + struct list_head *lgr_list; struct smc_link *lnk; + spinlock_t *lgr_lock; u8 rndvec[3]; int rc = 0; int i; @@ -231,10 +248,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->conns_all = RB_ROOT; if (ini->is_smcd) { /* SMC-D specific settings */ + get_device(&ini->ism_dev->dev); lgr->peer_gid = ini->ism_gid; lgr->smcd = ini->ism_dev; + lgr_list = &ini->ism_dev->lgr_list; + lgr_lock = &lgr->smcd->lgr_lock; } else { /* SMC-R specific settings */ + get_device(&ini->ib_dev->ibdev->dev); lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); @@ -245,6 +266,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lnk->link_id = SMC_SINGLE_LINK; lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; + lgr_list = &smc_lgr_list.list; + lgr_lock = &smc_lgr_list.lock; lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; if (!ini->ib_dev->initialized) @@ -274,9 +297,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) goto destroy_qp; } smc->conn.lgr = lgr; - spin_lock_bh(&smc_lgr_list.lock); - list_add(&lgr->list, &smc_lgr_list.list); - spin_unlock_bh(&smc_lgr_list.lock); + spin_lock_bh(lgr_lock); + list_add(&lgr->list, lgr_list); + spin_unlock_bh(lgr_lock); return 0; destroy_qp: @@ -433,20 +456,27 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) static void smc_lgr_free(struct smc_link_group *lgr) { smc_lgr_free_bufs(lgr); - if (lgr->is_smcd) + if (lgr->is_smcd) { smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); - else + put_device(&lgr->smcd->dev); + } else { smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); + put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev); + } kfree(lgr); } void smc_lgr_forget(struct smc_link_group *lgr) { - spin_lock_bh(&smc_lgr_list.lock); + struct list_head *lgr_list; + spinlock_t *lgr_lock; + + lgr_list = smc_lgr_list_head(lgr, &lgr_lock); + spin_lock_bh(lgr_lock); /* do not use this link group for new connections */ - if (!list_empty(&lgr->list)) - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); + if (!list_empty(lgr_list)) + list_del_init(lgr_list); + spin_unlock_bh(lgr_lock); } /* terminate linkgroup abnormally */ @@ -487,9 +517,12 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) void smc_lgr_terminate(struct smc_link_group *lgr) { - spin_lock_bh(&smc_lgr_list.lock); + spinlock_t *lgr_lock; + + smc_lgr_list_head(lgr, &lgr_lock); + spin_lock_bh(lgr_lock); __smc_lgr_terminate(lgr); - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); } /* Called when IB port is terminated */ @@ -514,16 +547,15 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) LIST_HEAD(lgr_free_list); /* run common cleanup function and build free list */ - spin_lock_bh(&smc_lgr_list.lock); - list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { - if (lgr->is_smcd && lgr->smcd == dev && - (!peer_gid || lgr->peer_gid == peer_gid) && + spin_lock_bh(&dev->lgr_lock); + list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { + if ((!peer_gid || lgr->peer_gid == peer_gid) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { __smc_lgr_terminate(lgr); list_move(&lgr->list, &lgr_free_list); } } - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(&dev->lgr_lock); /* cancel the regular free workers and actually free lgrs */ list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { @@ -607,10 +639,14 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; + struct list_head *lgr_list; struct smc_link_group *lgr; enum smc_lgr_role role; + spinlock_t *lgr_lock; int rc = 0; + lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; + lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; ini->cln_first_contact = SMC_FIRST_CONTACT; role = smc->listen_smc ? SMC_SERV : SMC_CLNT; if (role == SMC_CLNT && ini->srv_first_contact) @@ -618,8 +654,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) goto create; /* determine if an existing link group can be reused */ - spin_lock_bh(&smc_lgr_list.lock); - list_for_each_entry(lgr, &smc_lgr_list.list, list) { + spin_lock_bh(lgr_lock); + list_for_each_entry(lgr, lgr_list, list) { write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : @@ -639,7 +675,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) } write_unlock_bh(&lgr->conns_lock); } - spin_unlock_bh(&smc_lgr_list.lock); + spin_unlock_bh(lgr_lock); if (role == SMC_CLNT && !ini->srv_first_contact && ini->cln_first_contact == SMC_FIRST_CONTACT) { @@ -1027,16 +1063,45 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, return 0; } +static void smc_core_going_away(void) +{ + struct smc_ib_device *smcibdev; + struct smcd_dev *smcd; + + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { + int i; + + for (i = 0; i < SMC_MAX_PORTS; i++) + set_bit(i, smcibdev->ports_going_away); + } + spin_unlock(&smc_ib_devices.lock); + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(smcd, &smcd_dev_list.list, list) { + smcd->going_away = 1; + } + spin_unlock(&smcd_dev_list.lock); +} + /* Called (from smc_exit) when module is removed */ void smc_core_exit(void) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_freeing_list); + struct smcd_dev *smcd; + + smc_core_going_away(); spin_lock_bh(&smc_lgr_list.lock); - if (!list_empty(&smc_lgr_list.list)) - list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); + list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); spin_unlock_bh(&smc_lgr_list.lock); + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(smcd, &smcd_dev_list.list, list) + list_splice_init(&smcd->lgr_list, &lgr_freeing_list); + spin_unlock(&smcd_dev_list.lock); + list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); if (!lgr->is_smcd) { diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d14ca4af6f94..af05daeb0538 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -242,8 +242,12 @@ static void smc_ib_port_event_work(struct work_struct *work) for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { smc_ib_remember_port_attr(smcibdev, port_idx + 1); clear_bit(port_idx, &smcibdev->port_event_mask); - if (!smc_ib_port_active(smcibdev, port_idx + 1)) + if (!smc_ib_port_active(smcibdev, port_idx + 1)) { + set_bit(port_idx, smcibdev->ports_going_away); smc_port_terminate(smcibdev, port_idx + 1); + } else { + clear_bit(port_idx, smcibdev->ports_going_away); + } } } @@ -259,8 +263,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, switch (ibevent->event) { case IB_EVENT_DEVICE_FATAL: /* terminate all ports on device */ - for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) + for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) { set_bit(port_idx, &smcibdev->port_event_mask); + set_bit(port_idx, smcibdev->ports_going_away); + } schedule_work(&smcibdev->port_event_work); break; case IB_EVENT_PORT_ERR: @@ -269,6 +275,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, port_idx = ibevent->element.port_num - 1; if (port_idx < SMC_MAX_PORTS) { set_bit(port_idx, &smcibdev->port_event_mask); + if (ibevent->event == IB_EVENT_PORT_ERR) + set_bit(port_idx, smcibdev->ports_going_away); + else if (ibevent->event == IB_EVENT_PORT_ACTIVE) + clear_bit(port_idx, smcibdev->ports_going_away); schedule_work(&smcibdev->port_event_work); } break; @@ -307,6 +317,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) port_idx = ibevent->element.qp->port - 1; if (port_idx < SMC_MAX_PORTS) { set_bit(port_idx, &smcibdev->port_event_mask); + set_bit(port_idx, smcibdev->ports_going_away); schedule_work(&smcibdev->port_event_work); } break; diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index da60ab9e8d70..6a0069db6cae 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -47,6 +47,7 @@ struct smc_ib_device { /* ib-device infos for smc */ u8 initialized : 1; /* ib dev CQ, evthdl done */ struct work_struct port_event_work; unsigned long port_event_mask; + DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS); }; struct smc_buf_desc; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index e89e918b88e0..ee7340898cb4 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -286,7 +286,9 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, smc_pnetid_by_dev_port(parent, 0, smcd->pnetid); spin_lock_init(&smcd->lock); + spin_lock_init(&smcd->lgr_lock); INIT_LIST_HEAD(&smcd->vlan); + INIT_LIST_HEAD(&smcd->lgr_list); smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", WQ_MEM_RECLAIM, name); if (!smcd->event_wq) { @@ -313,6 +315,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd) spin_lock(&smcd_dev_list.lock); list_del(&smcd->list); spin_unlock(&smcd_dev_list.lock); + smcd->going_away = 1; flush_workqueue(smcd->event_wq); destroy_workqueue(smcd->event_wq); smc_smcd_terminate(smcd, 0, VLAN_VID_MASK); @@ -342,6 +345,8 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) { struct smc_ism_event_work *wrk; + if (smcd->going_away) + return; /* copy event to event work queue, and let it be handled there */ wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC); if (!wrk) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index bab2da8cf17a..6b7799b3f5ca 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -781,6 +781,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, dev_put(ndev); if (netdev == ndev && smc_ib_port_active(ibdev, i) && + !test_bit(i - 1, ibdev->ports_going_away) && !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL)) { ini->ib_dev = ibdev; @@ -820,6 +821,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, continue; if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && smc_ib_port_active(ibdev, i) && + !test_bit(i - 1, ibdev->ports_going_away) && !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL)) { ini->ib_dev = ibdev; @@ -846,7 +848,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, spin_lock(&smcd_dev_list.lock); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { - if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { + if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && + !ismdev->going_away) { ini->ism_dev = ismdev; break; } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index d6165ad384c0..d32bbd0f5e46 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -176,7 +176,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_PUBL_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_nl_publ_dump, }, { @@ -239,7 +240,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_MON_PEER_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_nl_node_dump_monitor_peer, }, { @@ -250,7 +252,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = { #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_udp_nl_dump_remoteip, }, #endif @@ -268,18 +271,6 @@ struct genl_family tipc_genl_family __ro_after_init = { .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), }; -int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) -{ - u32 maxattr = tipc_genl_family.maxattr; - - *attr = genl_family_attrbuf(&tipc_genl_family); - if (!*attr) - return -EOPNOTSUPP; - - return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr, - tipc_nl_policy, NULL); -} - int __init tipc_netlink_start(void) { int res; diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index 4ba0ad422110..7cf777723e3e 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -38,7 +38,6 @@ #include <net/netlink.h> extern struct genl_family tipc_genl_family; -int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); struct tipc_nl_msg { struct sk_buff *skb; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e135d4e11231..17a529739f8d 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -181,15 +181,18 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) { + struct genl_dumpit_info info; int len = 0; int err; struct sk_buff *buf; struct nlmsghdr *nlmsg; struct netlink_callback cb; + struct nlattr **attrbuf; memset(&cb, 0, sizeof(cb)); cb.nlh = (struct nlmsghdr *)arg->data; cb.skb = arg; + cb.data = &info; buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!buf) @@ -201,19 +204,35 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; } + attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto err_out; + } + + info.attrs = attrbuf; + err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, NULL); + if (err) + goto err_out; + do { int rem; len = (*cmd->dumpit)(buf, &cb); nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { - struct nlattr **attrs; - - err = tipc_nlmsg_parse(nlmsg, &attrs); + err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN, + attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, + NULL); if (err) goto err_out; - err = (*cmd->format)(msg, attrs); + err = (*cmd->format)(msg, attrbuf); if (err) goto err_out; @@ -231,6 +250,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, err = 0; err_out: + kfree(attrbuf); tipc_dump_done(&cb); kfree_skb(buf); diff --git a/net/tipc/node.c b/net/tipc/node.c index c8f6177dd5a2..f2e3cf70c922 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2484,13 +2484,9 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, int err; if (!prev_node) { - struct nlattr **attrs; + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_MON]) return -EINVAL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f8bbc4aab213..35e32ffc2b90 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3588,13 +3588,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) struct tipc_sock *tsk; if (!tsk_portid) { - struct nlattr **attrs; + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 287df68721df..43ca5fd6574d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -448,15 +448,11 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) int i; if (!bid && !skip_cnt) { + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; struct net *net = sock_net(skb->sk); struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; - struct nlattr **attrs; char *bname; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; diff --git a/net/tls/Kconfig b/net/tls/Kconfig index e4328b3b72eb..61ec78521a60 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -26,3 +26,13 @@ config TLS_DEVICE Enable kernel support for HW offload of the TLS protocol. If unsure, say N. + +config TLS_TOE + bool "Transport Layer Security TCP stack bypass" + depends on TLS + default n + help + Enable kernel support for legacy HW offload of the TLS protocol, + which is incompatible with the Linux networking stack semantics. + + If unsure, say N. diff --git a/net/tls/Makefile b/net/tls/Makefile index ef0dc74ce8f9..f1ffbfe8968d 100644 --- a/net/tls/Makefile +++ b/net/tls/Makefile @@ -3,8 +3,11 @@ # Makefile for the TLS subsystem. # +CFLAGS_trace.o := -I$(src) + obj-$(CONFIG_TLS) += tls.o -tls-y := tls_main.o tls_sw.o +tls-y := tls_main.o tls_sw.o tls_proc.o trace.o +tls-$(CONFIG_TLS_TOE) += tls_toe.o tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f959487c5cd1..33b267b052c0 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -38,6 +38,8 @@ #include <net/tcp.h> #include <net/tls.h> +#include "trace.h" + /* device_offload_lock is used to synchronize tls_dev_add * against NETDEV_DOWN notifications. */ @@ -202,6 +204,15 @@ void tls_device_free_resources_tx(struct sock *sk) tls_free_partial_record(sk, tls_ctx); } +void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + trace_tls_device_tx_resync_req(sk, got_seq, exp_seq); + WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags)); +} +EXPORT_SYMBOL_GPL(tls_offload_tx_resync_request); + static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { @@ -216,6 +227,7 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, rcd_sn = tls_ctx->tx.rec_seq; + trace_tls_device_tx_resync_send(sk, seq, rcd_sn); down_read(&device_offload_lock); netdev = tls_ctx->netdev; if (netdev) @@ -419,7 +431,7 @@ static int tls_push_data(struct sock *sk, ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) return -ENOTSUPP; - if (sk->sk_err) + if (unlikely(sk->sk_err)) return -sk->sk_err; flags |= MSG_SENDPAGE_DECRYPTED; @@ -440,9 +452,8 @@ static int tls_push_data(struct sock *sk, max_open_record_len = TLS_MAX_PAYLOAD_SIZE + prot->prepend_size; do { - rc = tls_do_allocation(sk, ctx, pfrag, - prot->prepend_size); - if (rc) { + rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size); + if (unlikely(rc)) { rc = sk_stream_wait_memory(sk, &timeo); if (!rc) continue; @@ -637,15 +648,19 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) static void tls_device_resync_rx(struct tls_context *tls_ctx, struct sock *sk, u32 seq, u8 *rcd_sn) { + struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); struct net_device *netdev; if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags))) return; + + trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type); netdev = READ_ONCE(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_RX); clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC); } void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) @@ -653,8 +668,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx; u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE]; + u32 sock_data, is_req_pending; struct tls_prot_info *prot; - u32 is_req_pending; s64 resync_req; u32 req_seq; @@ -683,8 +698,12 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) /* head of next rec is already in, note that the sock_inq will * include the currently parsed message when called from parser */ - if (tcp_inq(sk) > rcd_len) + sock_data = tcp_inq(sk); + if (sock_data > rcd_len) { + trace_tls_device_rx_resync_nh_delay(sk, sock_data, + rcd_len); return; + } rx_ctx->resync_nh_do_now = 0; seq += rcd_len; @@ -728,6 +747,7 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx, /* head of next rec is already in, parser will sync for us */ if (tcp_inq(sk) > rxm->full_len) { + trace_tls_device_rx_resync_nh_schedule(sk); ctx->resync_nh_do_now = 1; } else { struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -826,9 +846,9 @@ free_buf: return err; } -int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) +int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm) { - struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx); int is_decrypted = skb->decrypted; int is_encrypted = !is_decrypted; @@ -840,6 +860,10 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) is_encrypted &= !skb_iter->decrypted; } + trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len, + tls_ctx->rx.rec_seq, rxm->full_len, + is_encrypted, is_decrypted); + ctx->sw.decrypted |= is_decrypted; /* Return immediately if the record is either entirely plaintext or @@ -1013,6 +1037,8 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX, &ctx->crypto_send.info, tcp_sk(sk)->write_seq); + trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_TX, + tcp_sk(sk)->write_seq, rec_seq, rc); if (rc) goto release_lock; @@ -1049,6 +1075,7 @@ free_marker_record: int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) { + struct tls12_crypto_info_aes_gcm_128 *info; struct tls_offload_context_rx *context; struct net_device *netdev; int rc = 0; @@ -1096,6 +1123,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, tcp_sk(sk)->copied_seq); + info = (void *)&ctx->crypto_recv.info; + trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_RX, + tcp_sk(sk)->copied_seq, info->rec_seq, rc); if (rc) goto free_sw_resources; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ac88877dcade..f144b965704e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -41,7 +41,9 @@ #include <linux/inetdevice.h> #include <linux/inet_diag.h> +#include <net/snmp.h> #include <net/tls.h> +#include <net/tls_toe.h> MODULE_AUTHOR("Mellanox Technologies"); MODULE_DESCRIPTION("Transport Layer Security Support"); @@ -58,14 +60,12 @@ static struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); static struct proto *saved_tcpv4_prot; static DEFINE_MUTEX(tcpv4_prot_mutex); -static LIST_HEAD(device_list); -static DEFINE_SPINLOCK(device_spinlock); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static struct proto_ops tls_sw_proto_ops; static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], struct proto *base); -static void update_sk_prot(struct sock *sk, struct tls_context *ctx) +void update_sk_prot(struct sock *sk, struct tls_context *ctx) { int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; @@ -286,14 +286,19 @@ static void tls_sk_proto_cleanup(struct sock *sk, kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } - if (ctx->rx_conf == TLS_SW) + if (ctx->rx_conf == TLS_SW) { tls_sw_release_resources_rx(sk); - else if (ctx->rx_conf == TLS_HW) + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + } else if (ctx->rx_conf == TLS_HW) { tls_device_offload_cleanup_rx(sk); + TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); + } } static void tls_sk_proto_close(struct sock *sk, long timeout) @@ -534,19 +539,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, if (tx) { rc = tls_set_device_offload(sk, ctx); conf = TLS_HW; - if (rc) { + if (!rc) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); + } else { rc = tls_set_sw_offload(sk, ctx, 1); if (rc) goto err_crypto_info; + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); conf = TLS_SW; } } else { rc = tls_set_device_offload_rx(sk, ctx); conf = TLS_HW; - if (rc) { + if (!rc) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); + } else { rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto err_crypto_info; + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); conf = TLS_SW; } tls_sw_strparser_arm(sk, ctx); @@ -603,7 +618,7 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } -static struct tls_context *create_ctx(struct sock *sk) +struct tls_context *tls_ctx_create(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx; @@ -643,90 +658,6 @@ static void tls_build_proto(struct sock *sk) } } -static void tls_hw_sk_destruct(struct sock *sk) -{ - struct tls_context *ctx = tls_get_ctx(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - - ctx->sk_destruct(sk); - /* Free ctx */ - rcu_assign_pointer(icsk->icsk_ulp_data, NULL); - tls_ctx_free(sk, ctx); -} - -static int tls_hw_prot(struct sock *sk) -{ - struct tls_context *ctx; - struct tls_device *dev; - int rc = 0; - - spin_lock_bh(&device_spinlock); - list_for_each_entry(dev, &device_list, dev_list) { - if (dev->feature && dev->feature(dev)) { - ctx = create_ctx(sk); - if (!ctx) - goto out; - - spin_unlock_bh(&device_spinlock); - tls_build_proto(sk); - ctx->sk_destruct = sk->sk_destruct; - sk->sk_destruct = tls_hw_sk_destruct; - ctx->rx_conf = TLS_HW_RECORD; - ctx->tx_conf = TLS_HW_RECORD; - update_sk_prot(sk, ctx); - spin_lock_bh(&device_spinlock); - rc = 1; - break; - } - } -out: - spin_unlock_bh(&device_spinlock); - return rc; -} - -static void tls_hw_unhash(struct sock *sk) -{ - struct tls_context *ctx = tls_get_ctx(sk); - struct tls_device *dev; - - spin_lock_bh(&device_spinlock); - list_for_each_entry(dev, &device_list, dev_list) { - if (dev->unhash) { - kref_get(&dev->kref); - spin_unlock_bh(&device_spinlock); - dev->unhash(dev, sk); - kref_put(&dev->kref, dev->release); - spin_lock_bh(&device_spinlock); - } - } - spin_unlock_bh(&device_spinlock); - ctx->sk_proto->unhash(sk); -} - -static int tls_hw_hash(struct sock *sk) -{ - struct tls_context *ctx = tls_get_ctx(sk); - struct tls_device *dev; - int err; - - err = ctx->sk_proto->hash(sk); - spin_lock_bh(&device_spinlock); - list_for_each_entry(dev, &device_list, dev_list) { - if (dev->hash) { - kref_get(&dev->kref); - spin_unlock_bh(&device_spinlock); - err |= dev->hash(dev, sk); - kref_put(&dev->kref, dev->release); - spin_lock_bh(&device_spinlock); - } - } - spin_unlock_bh(&device_spinlock); - - if (err) - tls_hw_unhash(sk); - return err; -} - static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], struct proto *base) { @@ -764,10 +695,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW]; #endif - +#ifdef CONFIG_TLS_TOE prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base; - prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash; - prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_toe_hash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_toe_unhash; +#endif } static int tls_init(struct sock *sk) @@ -775,8 +707,12 @@ static int tls_init(struct sock *sk) struct tls_context *ctx; int rc = 0; - if (tls_hw_prot(sk)) + tls_build_proto(sk); + +#ifdef CONFIG_TLS_TOE + if (tls_toe_bypass(sk)) return 0; +#endif /* The TLS ulp is currently supported only for TCP sockets * in ESTABLISHED state. @@ -787,11 +723,9 @@ static int tls_init(struct sock *sk) if (sk->sk_state != TCP_ESTABLISHED) return -ENOTSUPP; - tls_build_proto(sk); - /* allocate tls context */ write_lock_bh(&sk->sk_callback_lock); - ctx = create_ctx(sk); + ctx = tls_ctx_create(sk); if (!ctx) { rc = -ENOMEM; goto out; @@ -877,21 +811,34 @@ static size_t tls_get_info_size(const struct sock *sk) return size; } -void tls_register_device(struct tls_device *device) +static int __net_init tls_init_net(struct net *net) { - spin_lock_bh(&device_spinlock); - list_add_tail(&device->dev_list, &device_list); - spin_unlock_bh(&device_spinlock); + int err; + + net->mib.tls_statistics = alloc_percpu(struct linux_tls_mib); + if (!net->mib.tls_statistics) + return -ENOMEM; + + err = tls_proc_init(net); + if (err) + goto err_free_stats; + + return 0; +err_free_stats: + free_percpu(net->mib.tls_statistics); + return err; } -EXPORT_SYMBOL(tls_register_device); -void tls_unregister_device(struct tls_device *device) +static void __net_exit tls_exit_net(struct net *net) { - spin_lock_bh(&device_spinlock); - list_del(&device->dev_list); - spin_unlock_bh(&device_spinlock); + tls_proc_fini(net); + free_percpu(net->mib.tls_statistics); } -EXPORT_SYMBOL(tls_unregister_device); + +static struct pernet_operations tls_proc_ops = { + .init = tls_init_net, + .exit = tls_exit_net, +}; static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", @@ -904,6 +851,12 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { static int __init tls_register(void) { + int err; + + err = register_pernet_subsys(&tls_proc_ops); + if (err) + return err; + tls_sw_proto_ops = inet_stream_ops; tls_sw_proto_ops.splice_read = tls_sw_splice_read; @@ -917,6 +870,7 @@ static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); tls_device_cleanup(); + unregister_pernet_subsys(&tls_proc_ops); } module_init(tls_register); diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c new file mode 100644 index 000000000000..83d9c80a684e --- /dev/null +++ b/net/tls/tls_proc.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <net/snmp.h> +#include <net/tls.h> + +static const struct snmp_mib tls_mib_list[] = { + SNMP_MIB_ITEM("TlsCurrTxSw", LINUX_MIB_TLSCURRTXSW), + SNMP_MIB_ITEM("TlsCurrRxSw", LINUX_MIB_TLSCURRRXSW), + SNMP_MIB_ITEM("TlsCurrTxDevice", LINUX_MIB_TLSCURRTXDEVICE), + SNMP_MIB_ITEM("TlsCurrRxDevice", LINUX_MIB_TLSCURRRXDEVICE), + SNMP_MIB_ITEM("TlsTxSw", LINUX_MIB_TLSTXSW), + SNMP_MIB_ITEM("TlsRxSw", LINUX_MIB_TLSRXSW), + SNMP_MIB_ITEM("TlsTxDevice", LINUX_MIB_TLSTXDEVICE), + SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE), + SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR), + SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), + SNMP_MIB_SENTINEL +}; + +static int tls_statistics_seq_show(struct seq_file *seq, void *v) +{ + unsigned long buf[LINUX_MIB_TLSMAX] = {}; + struct net *net = seq->private; + int i; + + snmp_get_cpu_field_batch(buf, tls_mib_list, net->mib.tls_statistics); + for (i = 0; tls_mib_list[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", tls_mib_list[i].name, buf[i]); + + return 0; +} + +int __net_init tls_proc_init(struct net *net) +{ + if (!proc_create_net_single("tls_stat", 0444, net->proc_net, + tls_statistics_seq_show, NULL)) + return -ENOMEM; + return 0; +} + +void __net_exit tls_proc_fini(struct net *net) +{ + remove_proc_entry("tls_stat", net->proc_net); +} diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c2b5e0d2ba1a..de7561d4cfa5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -168,6 +168,9 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) /* Propagate if there was an err */ if (err) { + if (err == -EBADMSG) + TLS_INC_STATS(sock_net(skb->sk), + LINUX_MIB_TLSDECRYPTERROR); ctx->async_wait.err = err; tls_err_abort(skb->sk, err); } else { @@ -253,6 +256,8 @@ static int tls_do_decryption(struct sock *sk, return ret; ret = crypto_wait_req(ret, &ctx->async_wait); + } else if (ret == -EBADMSG) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); } if (async) @@ -1490,7 +1495,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, if (!ctx->decrypted) { if (tls_ctx->rx_conf == TLS_HW) { - err = tls_device_decrypted(sk, skb); + err = tls_device_decrypted(sk, tls_ctx, skb, rxm); if (err < 0) return err; } @@ -1518,7 +1523,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - ctx->decrypted = true; + ctx->decrypted = 1; ctx->saved_data_ready(sk); } else { *zc = false; @@ -1928,7 +1933,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, tls_err_abort(sk, EBADMSG); goto splice_read_end; } - ctx->decrypted = true; + ctx->decrypted = 1; } rxm = strp_msg(skb); @@ -2029,7 +2034,7 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb) struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - ctx->decrypted = false; + ctx->decrypted = 0; ctx->recv_pkt = skb; strp_pause(strp); @@ -2386,10 +2391,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); if (crypto_info->version == TLS_1_3_VERSION) - sw_ctx_rx->async_capable = false; + sw_ctx_rx->async_capable = 0; else sw_ctx_rx->async_capable = - tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + !!(tfm->__crt_alg->cra_flags & + CRYPTO_ALG_ASYNC); /* Set up strparser */ memset(&cb, 0, sizeof(cb)); diff --git a/net/tls/tls_toe.c b/net/tls/tls_toe.c new file mode 100644 index 000000000000..7e1330f19165 --- /dev/null +++ b/net/tls/tls_toe.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/list.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +#include <net/inet_connection_sock.h> +#include <net/tls.h> +#include <net/tls_toe.h> + +static LIST_HEAD(device_list); +static DEFINE_SPINLOCK(device_spinlock); + +static void tls_toe_sk_destruct(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tls_context *ctx = tls_get_ctx(sk); + + ctx->sk_destruct(sk); + /* Free ctx */ + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tls_ctx_free(sk, ctx); +} + +int tls_toe_bypass(struct sock *sk) +{ + struct tls_toe_device *dev; + struct tls_context *ctx; + int rc = 0; + + spin_lock_bh(&device_spinlock); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->feature && dev->feature(dev)) { + ctx = tls_ctx_create(sk); + if (!ctx) + goto out; + + ctx->sk_destruct = sk->sk_destruct; + sk->sk_destruct = tls_toe_sk_destruct; + ctx->rx_conf = TLS_HW_RECORD; + ctx->tx_conf = TLS_HW_RECORD; + update_sk_prot(sk, ctx); + rc = 1; + break; + } + } +out: + spin_unlock_bh(&device_spinlock); + return rc; +} + +void tls_toe_unhash(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_toe_device *dev; + + spin_lock_bh(&device_spinlock); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->unhash) { + kref_get(&dev->kref); + spin_unlock_bh(&device_spinlock); + dev->unhash(dev, sk); + kref_put(&dev->kref, dev->release); + spin_lock_bh(&device_spinlock); + } + } + spin_unlock_bh(&device_spinlock); + ctx->sk_proto->unhash(sk); +} + +int tls_toe_hash(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_toe_device *dev; + int err; + + err = ctx->sk_proto->hash(sk); + spin_lock_bh(&device_spinlock); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->hash) { + kref_get(&dev->kref); + spin_unlock_bh(&device_spinlock); + err |= dev->hash(dev, sk); + kref_put(&dev->kref, dev->release); + spin_lock_bh(&device_spinlock); + } + } + spin_unlock_bh(&device_spinlock); + + if (err) + tls_toe_unhash(sk); + return err; +} + +void tls_toe_register_device(struct tls_toe_device *device) +{ + spin_lock_bh(&device_spinlock); + list_add_tail(&device->dev_list, &device_list); + spin_unlock_bh(&device_spinlock); +} +EXPORT_SYMBOL(tls_toe_register_device); + +void tls_toe_unregister_device(struct tls_toe_device *device) +{ + spin_lock_bh(&device_spinlock); + list_del(&device->dev_list); + spin_unlock_bh(&device_spinlock); +} +EXPORT_SYMBOL(tls_toe_unregister_device); diff --git a/net/tls/trace.c b/net/tls/trace.c new file mode 100644 index 000000000000..e374913cf9c9 --- /dev/null +++ b/net/tls/trace.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/module.h> + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" + +#endif diff --git a/net/tls/trace.h b/net/tls/trace.h new file mode 100644 index 000000000000..9ba5f600ea43 --- /dev/null +++ b/net/tls/trace.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tls + +#if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _TLS_TRACE_H_ + +#include <asm/unaligned.h> +#include <linux/tracepoint.h> + +struct sock; + +TRACE_EVENT(tls_device_offload_set, + + TP_PROTO(struct sock *sk, int dir, u32 tcp_seq, u8 *rec_no, int ret), + + TP_ARGS(sk, dir, tcp_seq, rec_no, ret), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( int, dir ) + __field( u32, tcp_seq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->dir = dir; + __entry->tcp_seq = tcp_seq; + __entry->ret = ret; + ), + + TP_printk( + "sk=%p direction=%d tcp_seq=%u rec_no=%llu ret=%d", + __entry->sk, __entry->dir, __entry->tcp_seq, __entry->rec_no, + __entry->ret + ) +); + +TRACE_EVENT(tls_device_decrypted, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, u32 rec_len, + bool encrypted, bool decrypted), + + TP_ARGS(sk, tcp_seq, rec_no, rec_len, encrypted, decrypted), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( u32, tcp_seq ) + __field( u32, rec_len ) + __field( bool, encrypted ) + __field( bool, decrypted ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->tcp_seq = tcp_seq; + __entry->rec_len = rec_len; + __entry->encrypted = encrypted; + __entry->decrypted = decrypted; + ), + + TP_printk( + "sk=%p tcp_seq=%u rec_no=%llu len=%u encrypted=%d decrypted=%d", + __entry->sk, __entry->tcp_seq, + __entry->rec_no, __entry->rec_len, + __entry->encrypted, __entry->decrypted + ) +); + +TRACE_EVENT(tls_device_rx_resync_send, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, int sync_type), + + TP_ARGS(sk, tcp_seq, rec_no, sync_type), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( u32, tcp_seq ) + __field( int, sync_type ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->tcp_seq = tcp_seq; + __entry->sync_type = sync_type; + ), + + TP_printk( + "sk=%p tcp_seq=%u rec_no=%llu sync_type=%d", + __entry->sk, __entry->tcp_seq, __entry->rec_no, + __entry->sync_type + ) +); + +TRACE_EVENT(tls_device_rx_resync_nh_schedule, + + TP_PROTO(struct sock *sk), + + TP_ARGS(sk), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + ), + + TP_fast_assign( + __entry->sk = sk; + ), + + TP_printk( + "sk=%p", __entry->sk + ) +); + +TRACE_EVENT(tls_device_rx_resync_nh_delay, + + TP_PROTO(struct sock *sk, u32 sock_data, u32 rec_len), + + TP_ARGS(sk, sock_data, rec_len), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u32, sock_data ) + __field( u32, rec_len ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->sock_data = sock_data; + __entry->rec_len = rec_len; + ), + + TP_printk( + "sk=%p sock_data=%u rec_len=%u", + __entry->sk, __entry->sock_data, __entry->rec_len + ) +); + +TRACE_EVENT(tls_device_tx_resync_req, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u32 exp_tcp_seq), + + TP_ARGS(sk, tcp_seq, exp_tcp_seq), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u32, tcp_seq ) + __field( u32, exp_tcp_seq ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->tcp_seq = tcp_seq; + __entry->exp_tcp_seq = exp_tcp_seq; + ), + + TP_printk( + "sk=%p tcp_seq=%u exp_tcp_seq=%u", + __entry->sk, __entry->tcp_seq, __entry->exp_tcp_seq + ) +); + +TRACE_EVENT(tls_device_tx_resync_send, + + TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no), + + TP_ARGS(sk, tcp_seq, rec_no), + + TP_STRUCT__entry( + __field( struct sock *, sk ) + __field( u64, rec_no ) + __field( u32, tcp_seq ) + ), + + TP_fast_assign( + __entry->sk = sk; + __entry->rec_no = get_unaligned_be64(rec_no); + __entry->tcp_seq = tcp_seq; + ), + + TP_printk( + "sk=%p tcp_seq=%u rec_no=%llu", + __entry->sk, __entry->tcp_seq, __entry->rec_no + ) +); + +#endif /* _TLS_TRACE_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +#include <trace/define_trace.h> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 67e87db5877f..c853ad0875f4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -284,11 +284,9 @@ static struct sock *__unix_find_socket_byname(struct net *net, if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) - goto found; + return s; } - s = NULL; -found: - return s; + return NULL; } static inline struct sock *unix_find_socket_byname(struct net *net, diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index c443db7af8d4..bef8772116ec 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -13,15 +13,16 @@ #include <linux/hyperv.h> #include <net/sock.h> #include <net/af_vsock.h> +#include <asm/hyperv-tlfs.h> /* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some - * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer - * hosts don't have this limitation; but, keep the defaults the same for compat. + * stricter requirements on the hv_sock ring buffer size of six 4K pages. + * hyperv-tlfs defines HV_HYP_PAGE_SIZE as 4K. Newer hosts don't have this + * limitation; but, keep the defaults the same for compat. */ -#define PAGE_SIZE_4K 4096 -#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6) -#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6) -#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64) +#define RINGBUFFER_HVS_RCV_SIZE (HV_HYP_PAGE_SIZE * 6) +#define RINGBUFFER_HVS_SND_SIZE (HV_HYP_PAGE_SIZE * 6) +#define RINGBUFFER_HVS_MAX_SIZE (HV_HYP_PAGE_SIZE * 64) /* The MTU is 16KB per the host side's design */ #define HVS_MTU_SIZE (1024 * 16) @@ -54,7 +55,8 @@ struct hvs_recv_buf { * ringbuffer APIs that allow us to directly copy data from userspace buffer * to VMBus ringbuffer. */ -#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header)) +#define HVS_SEND_BUF_SIZE \ + (HV_HYP_PAGE_SIZE - sizeof(struct vmpipe_proto_header)) struct hvs_send_buf { /* The header before the payload data */ @@ -393,10 +395,10 @@ static void hvs_open_connection(struct vmbus_channel *chan) } else { sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE); sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE); - sndbuf = ALIGN(sndbuf, PAGE_SIZE); + sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE); rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE); rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE); - rcvbuf = ALIGN(rcvbuf, PAGE_SIZE); + rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE); } ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb, @@ -670,7 +672,7 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg, ssize_t ret = 0; ssize_t bytes_written = 0; - BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K); + BUILD_BUG_ON(sizeof(*send_buf) != HV_HYP_PAGE_SIZE); send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL); if (!send_buf) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 481f7f8a1655..d02c9b41a768 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -268,6 +268,55 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, } static ssize_t +virtio_transport_stream_do_peek(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt; + size_t bytes, total = 0, off; + int err = -EFAULT; + + spin_lock_bh(&vvs->rx_lock); + + list_for_each_entry(pkt, &vvs->rx_queue, list) { + off = pkt->off; + + if (total == len) + break; + + while (total < len && off < pkt->len) { + bytes = len - total; + if (bytes > pkt->len - off) + bytes = pkt->len - off; + + /* sk_lock is held by caller so no one else can dequeue. + * Unlock rx_lock since memcpy_to_msg() may sleep. + */ + spin_unlock_bh(&vvs->rx_lock); + + err = memcpy_to_msg(msg, pkt->buf + off, bytes); + if (err) + goto out; + + spin_lock_bh(&vvs->rx_lock); + + total += bytes; + off += bytes; + } + } + + spin_unlock_bh(&vvs->rx_lock); + + return total; + +out: + if (total) + err = total; + return err; +} + +static ssize_t virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len) @@ -339,9 +388,9 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk, size_t len, int flags) { if (flags & MSG_PEEK) - return -EOPNOTSUPP; - - return virtio_transport_stream_do_dequeue(vsk, msg, len); + return virtio_transport_stream_do_peek(vsk, msg, len); + else + return virtio_transport_stream_do_dequeue(vsk, msg, len); } EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4453dd375de9..d1451e731bb8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8265,10 +8265,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, /* leave request id zero for legacy request * or if driver does not support multi-scheduled scan */ - if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) { - while (!sched_scan_req->reqid) - sched_scan_req->reqid = cfg80211_assign_cookie(rdev); - } + if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) + sched_scan_req->reqid = cfg80211_assign_cookie(rdev); err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1d9be26b4edd..4df11ddb9c75 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -4,55 +4,53 @@ BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src)) TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools # List of programs to build -hostprogs-y := test_lru_dist -hostprogs-y += sock_example -hostprogs-y += fds_example -hostprogs-y += sockex1 -hostprogs-y += sockex2 -hostprogs-y += sockex3 -hostprogs-y += tracex1 -hostprogs-y += tracex2 -hostprogs-y += tracex3 -hostprogs-y += tracex4 -hostprogs-y += tracex5 -hostprogs-y += tracex6 -hostprogs-y += tracex7 -hostprogs-y += test_probe_write_user -hostprogs-y += trace_output -hostprogs-y += lathist -hostprogs-y += offwaketime -hostprogs-y += spintest -hostprogs-y += map_perf_test -hostprogs-y += test_overhead -hostprogs-y += test_cgrp2_array_pin -hostprogs-y += test_cgrp2_attach -hostprogs-y += test_cgrp2_sock -hostprogs-y += test_cgrp2_sock2 -hostprogs-y += xdp1 -hostprogs-y += xdp2 -hostprogs-y += xdp_router_ipv4 -hostprogs-y += test_current_task_under_cgroup -hostprogs-y += trace_event -hostprogs-y += sampleip -hostprogs-y += tc_l2_redirect -hostprogs-y += lwt_len_hist -hostprogs-y += xdp_tx_iptunnel -hostprogs-y += test_map_in_map -hostprogs-y += per_socket_stats_example -hostprogs-y += xdp_redirect -hostprogs-y += xdp_redirect_map -hostprogs-y += xdp_redirect_cpu -hostprogs-y += xdp_monitor -hostprogs-y += xdp_rxq_info -hostprogs-y += syscall_tp -hostprogs-y += cpustat -hostprogs-y += xdp_adjust_tail -hostprogs-y += xdpsock -hostprogs-y += xdp_fwd -hostprogs-y += task_fd_query -hostprogs-y += xdp_sample_pkts -hostprogs-y += ibumad -hostprogs-y += hbm +tprogs-y := test_lru_dist +tprogs-y += sock_example +tprogs-y += fds_example +tprogs-y += sockex1 +tprogs-y += sockex2 +tprogs-y += sockex3 +tprogs-y += tracex1 +tprogs-y += tracex2 +tprogs-y += tracex3 +tprogs-y += tracex4 +tprogs-y += tracex5 +tprogs-y += tracex6 +tprogs-y += tracex7 +tprogs-y += test_probe_write_user +tprogs-y += trace_output +tprogs-y += lathist +tprogs-y += offwaketime +tprogs-y += spintest +tprogs-y += map_perf_test +tprogs-y += test_overhead +tprogs-y += test_cgrp2_array_pin +tprogs-y += test_cgrp2_attach +tprogs-y += test_cgrp2_sock +tprogs-y += test_cgrp2_sock2 +tprogs-y += xdp1 +tprogs-y += xdp2 +tprogs-y += xdp_router_ipv4 +tprogs-y += test_current_task_under_cgroup +tprogs-y += trace_event +tprogs-y += sampleip +tprogs-y += tc_l2_redirect +tprogs-y += lwt_len_hist +tprogs-y += xdp_tx_iptunnel +tprogs-y += test_map_in_map +tprogs-y += xdp_redirect_map +tprogs-y += xdp_redirect_cpu +tprogs-y += xdp_monitor +tprogs-y += xdp_rxq_info +tprogs-y += syscall_tp +tprogs-y += cpustat +tprogs-y += xdp_adjust_tail +tprogs-y += xdpsock +tprogs-y += xdp_fwd +tprogs-y += task_fd_query +tprogs-y += xdp_sample_pkts +tprogs-y += ibumad +tprogs-y += hbm # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -111,7 +109,7 @@ ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs -always := $(hostprogs-y) +always := $(tprogs-y) always += sockex1_kern.o always += sockex2_kern.o always += sockex3_kern.o @@ -145,7 +143,6 @@ always += sampleip_kern.o always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o always += test_map_in_map_kern.o -always += cookie_uid_helper_example.o always += tcp_synrto_kern.o always += tcp_rwnd_kern.o always += tcp_bufs_kern.o @@ -171,20 +168,38 @@ always += ibumad_kern.o always += hbm_out_kern.o always += hbm_edt_kern.o -KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include -KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/ -KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ -KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include -KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf +ifeq ($(ARCH), arm) +# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux +# headers when arm instruction set identification is requested. +ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS)) +BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR) +TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR) +endif + +TPROGS_CFLAGS += -Wall -O2 +TPROGS_CFLAGS += -Wmissing-prototypes +TPROGS_CFLAGS += -Wstrict-prototypes + +TPROGS_CFLAGS += -I$(objtree)/usr/include +TPROGS_CFLAGS += -I$(srctree)/tools/lib/bpf/ +TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ +TPROGS_CFLAGS += -I$(srctree)/tools/lib/ +TPROGS_CFLAGS += -I$(srctree)/tools/include +TPROGS_CFLAGS += -I$(srctree)/tools/perf -HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable +ifdef SYSROOT +TPROGS_CFLAGS += --sysroot=$(SYSROOT) +TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib +endif + +TPROGCFLAGS_bpf_load.o += -Wno-unused-variable -KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf -HOSTLDLIBS_tracex4 += -lrt -HOSTLDLIBS_trace_output += -lrt -HOSTLDLIBS_map_perf_test += -lrt -HOSTLDLIBS_test_overhead += -lrt -HOSTLDLIBS_xdpsock += -pthread +TPROGS_LDLIBS += $(LIBBPF) -lelf +TPROGLDLIBS_tracex4 += -lrt +TPROGLDLIBS_trace_output += -lrt +TPROGLDLIBS_map_perf_test += -lrt +TPROGLDLIBS_test_overhead += -lrt +TPROGLDLIBS_xdpsock += -pthread # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang @@ -195,15 +210,14 @@ BTF_PAHOLE ?= pahole # Detect that we're cross compiling and use the cross compiler ifdef CROSS_COMPILE -HOSTCC = $(CROSS_COMPILE)gcc -CLANG_ARCH_ARGS = -target $(ARCH) +CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%)) endif # Don't evaluate probes and warnings if we need to run make recursively ifneq ($(src),) -HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \ - $(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \ - echo okay) +HDR_PROBE := $(shell printf "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \ + $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \ + -o /dev/null 2>/dev/null && echo okay) ifeq ($(HDR_PROBE),) $(warning WARNING: Detected possible issues with include path.) @@ -219,10 +233,10 @@ BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ /bin/rm -f ./llvm_btf_verify.o) ifneq ($(BTF_LLVM_PROBE),) - EXTRA_CFLAGS += -g + BPF_EXTRA_CFLAGS += -g else ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),) - EXTRA_CFLAGS += -g + BPF_EXTRA_CFLAGS += -g LLC_FLAGS += -mattr=dwarfris DWARF2BTF = y endif @@ -239,7 +253,8 @@ clean: $(LIBBPF): FORCE # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(dir $@) RM='rm -rf' LDFLAGS= srctree=$(BPF_SAMPLES_PATH)/../../ O= + $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O= $(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE $(call filechk,offsets,__SYSCALL_NRS_H__) @@ -276,13 +291,16 @@ $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h $(obj)/hbm.o: $(src)/hbm.h $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h +-include $(BPF_SAMPLES_PATH)/Makefile.target + # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ - $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ - -I$(srctree)/tools/testing/selftests/bpf/ \ + $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ + -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ + -I$(srctree)/tools/lib/bpf/ \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target new file mode 100644 index 000000000000..7621f55e2947 --- /dev/null +++ b/samples/bpf/Makefile.target @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0 +# ========================================================================== +# Building binaries on the host system +# Binaries are not used during the compilation of the kernel, and intended +# to be build for target board, target board can be host of course. Added to +# build binaries to run not on host system. +# +# Sample syntax +# tprogs-y := xsk_example +# Will compile xsk_example.c and create an executable named xsk_example +# +# tprogs-y := xdpsock +# xdpsock-objs := xdpsock_1.o xdpsock_2.o +# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable +# xdpsock, based on xdpsock_1.o and xdpsock_2.o +# +# Derived from scripts/Makefile.host +# +__tprogs := $(sort $(tprogs-y)) + +# C code +# Executables compiled from a single .c file +tprog-csingle := $(foreach m,$(__tprogs), \ + $(if $($(m)-objs),,$(m))) + +# C executables linked based on several .o files +tprog-cmulti := $(foreach m,$(__tprogs),\ + $(if $($(m)-objs),$(m))) + +# Object (.o) files compiled from .c files +tprog-cobjs := $(sort $(foreach m,$(__tprogs),$($(m)-objs))) + +tprog-csingle := $(addprefix $(obj)/,$(tprog-csingle)) +tprog-cmulti := $(addprefix $(obj)/,$(tprog-cmulti)) +tprog-cobjs := $(addprefix $(obj)/,$(tprog-cobjs)) + +##### +# Handle options to gcc. Support building with separate output directory + +_tprogc_flags = $(TPROGS_CFLAGS) \ + $(TPROGCFLAGS_$(basetarget).o) + +# $(objtree)/$(obj) for including generated headers from checkin source files +ifeq ($(KBUILD_EXTMOD),) +ifdef building_out_of_srctree +_tprogc_flags += -I $(objtree)/$(obj) +endif +endif + +tprogc_flags = -Wp,-MD,$(depfile) $(_tprogc_flags) + +# Create executable from a single .c file +# tprog-csingle -> Executable +quiet_cmd_tprog-csingle = CC $@ + cmd_tprog-csingle = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \ + $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F)) +$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE + $(call if_changed_dep,tprog-csingle) + +# Link an executable based on list of .o files, all plain c +# tprog-cmulti -> executable +quiet_cmd_tprog-cmulti = LD $@ + cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \ + $(addprefix $(obj)/,$($(@F)-objs)) \ + $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F)) +$(tprog-cmulti): $(tprog-cobjs) FORCE + $(call if_changed,tprog-cmulti) +$(call multi_depend, $(tprog-cmulti), , -objs) + +# Create .o file from a single .c file +# tprog-cobjs -> .o +quiet_cmd_tprog-cobjs = CC $@ + cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $< +$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE + $(call if_changed_dep,tprog-cobjs) diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index 5f27e4faca50..cc1f00a1ee06 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -14,6 +14,20 @@ Compiling requires having installed: Note that LLVM's tool 'llc' must support target 'bpf', list version and supported targets with command: ``llc --version`` +Clean and configuration +----------------------- + +It can be needed to clean tools, samples or kernel before trying new arch or +after some changes (on demand):: + + make -C tools clean + make -C samples/bpf clean + make clean + +Configure kernel, defconfig for instance:: + + make defconfig + Kernel headers -------------- @@ -68,9 +82,26 @@ It is also possible to point make to the newly compiled 'llc' or Cross compiling samples ----------------------- In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH -environment variables before calling make. This will direct make to build -samples for the cross target. +environment variables before calling make. But do this before clean, +cofiguration and header install steps described above. This will direct make to +build samples for the cross target:: + + export ARCH=arm64 + export CROSS_COMPILE="aarch64-linux-gnu-" + +Headers can be also installed on RFS of target board if need to keep them in +sync (not necessarily and it creates a local "usr/include" directory also):: + + make INSTALL_HDR_PATH=~/some_sysroot/usr headers_install + +Pointing LLC and CLANG is not necessarily if it's installed on HOST and have +in its targets appropriate arm64 arch (usually it has several arches). +Build samples:: + + make samples/bpf/ + +Or build samples with SYSROOT if some header or library is absent in toolchain, +say libelf, providing address to file system containing headers and libs, +can be RFS of target board:: -export ARCH=arm64 -export CROSS_COMPILE="aarch64-linux-gnu-" -make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang + make samples/bpf/ SYSROOT=~/some_sysroot diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h index aa207a2eebbd..4edaf47876ca 100644 --- a/samples/bpf/hbm_kern.h +++ b/samples/bpf/hbm_kern.h @@ -59,21 +59,18 @@ #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) -struct bpf_map_def SEC("maps") queue_state = { - .type = BPF_MAP_TYPE_CGROUP_STORAGE, - .key_size = sizeof(struct bpf_cgroup_storage_key), - .value_size = sizeof(struct hbm_vqueue), -}; -BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key, - struct hbm_vqueue); - -struct bpf_map_def SEC("maps") queue_stats = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(struct hbm_queue_stats), - .max_entries = 1, -}; -BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats); +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct hbm_vqueue); +} queue_state SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct hvm_queue_stats); +} queue_stats SEC(".maps"); struct hbm_pkt_info { int cwnd; diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 2b2ffb97018b..5c11aefbc489 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -9,25 +9,27 @@ #include <linux/version.h> #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" +#include "bpf_tracing.h" #define MAX_ENTRIES 1000 #define MAX_NR_CPUS 1024 -struct bpf_map_def SEC("maps") hash_map = { +struct bpf_map_def_legacy SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), .max_entries = MAX_ENTRIES, }; -struct bpf_map_def SEC("maps") lru_hash_map = { +struct bpf_map_def_legacy SEC("maps") lru_hash_map = { .type = BPF_MAP_TYPE_LRU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), .max_entries = 10000, }; -struct bpf_map_def SEC("maps") nocommon_lru_hash_map = { +struct bpf_map_def_legacy SEC("maps") nocommon_lru_hash_map = { .type = BPF_MAP_TYPE_LRU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -35,7 +37,7 @@ struct bpf_map_def SEC("maps") nocommon_lru_hash_map = { .map_flags = BPF_F_NO_COMMON_LRU, }; -struct bpf_map_def SEC("maps") inner_lru_hash_map = { +struct bpf_map_def_legacy SEC("maps") inner_lru_hash_map = { .type = BPF_MAP_TYPE_LRU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -44,20 +46,20 @@ struct bpf_map_def SEC("maps") inner_lru_hash_map = { .numa_node = 0, }; -struct bpf_map_def SEC("maps") array_of_lru_hashs = { +struct bpf_map_def_legacy SEC("maps") array_of_lru_hashs = { .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, .key_size = sizeof(u32), .max_entries = MAX_NR_CPUS, }; -struct bpf_map_def SEC("maps") percpu_hash_map = { +struct bpf_map_def_legacy SEC("maps") percpu_hash_map = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), .max_entries = MAX_ENTRIES, }; -struct bpf_map_def SEC("maps") hash_map_alloc = { +struct bpf_map_def_legacy SEC("maps") hash_map_alloc = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -65,7 +67,7 @@ struct bpf_map_def SEC("maps") hash_map_alloc = { .map_flags = BPF_F_NO_PREALLOC, }; -struct bpf_map_def SEC("maps") percpu_hash_map_alloc = { +struct bpf_map_def_legacy SEC("maps") percpu_hash_map_alloc = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -73,7 +75,7 @@ struct bpf_map_def SEC("maps") percpu_hash_map_alloc = { .map_flags = BPF_F_NO_PREALLOC, }; -struct bpf_map_def SEC("maps") lpm_trie_map_alloc = { +struct bpf_map_def_legacy SEC("maps") lpm_trie_map_alloc = { .type = BPF_MAP_TYPE_LPM_TRIE, .key_size = 8, .value_size = sizeof(long), @@ -81,14 +83,14 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = { .map_flags = BPF_F_NO_PREALLOC, }; -struct bpf_map_def SEC("maps") array_map = { +struct bpf_map_def_legacy SEC("maps") array_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(long), .max_entries = MAX_ENTRIES, }; -struct bpf_map_def SEC("maps") lru_hash_lookup_map = { +struct bpf_map_def_legacy SEC("maps") lru_hash_lookup_map = { .type = BPF_MAP_TYPE_LRU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index e7d9a0a3d45b..9cb5207a692f 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -6,6 +6,7 @@ */ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" #include <uapi/linux/ptrace.h> #include <uapi/linux/perf_event.h> #include <linux/version.h> diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c index 6db6b21fdc6d..ef5892377beb 100644 --- a/samples/bpf/parse_ldabs.c +++ b/samples/bpf/parse_ldabs.c @@ -12,6 +12,7 @@ #include <linux/udp.h> #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" #define DEFAULT_PKTGEN_UDP_PORT 9 #define IP_MF 0x2000 diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c index ceabf31079cf..4a190893894f 100644 --- a/samples/bpf/sampleip_kern.c +++ b/samples/bpf/sampleip_kern.c @@ -9,6 +9,7 @@ #include <uapi/linux/bpf.h> #include <uapi/linux/bpf_perf_event.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" #define MAX_IPS 8192 diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c index ed18e9a4909c..f96943f443ab 100644 --- a/samples/bpf/sockex1_kern.c +++ b/samples/bpf/sockex1_kern.c @@ -3,6 +3,7 @@ #include <uapi/linux/if_packet.h> #include <uapi/linux/ip.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" struct bpf_map_def SEC("maps") my_map = { .type = BPF_MAP_TYPE_ARRAY, diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index f2f9dbc021b0..5566fa7d92fa 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -1,5 +1,6 @@ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" #include <uapi/linux/in.h> #include <uapi/linux/if.h> #include <uapi/linux/if_ether.h> diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index c527b57d3ec8..151dd842ecc0 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -6,6 +6,7 @@ */ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" #include <uapi/linux/in.h> #include <uapi/linux/if.h> #include <uapi/linux/if_ether.h> diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c index ce0167d09cdc..6e9478aa2938 100644 --- a/samples/bpf/spintest_kern.c +++ b/samples/bpf/spintest_kern.c @@ -10,6 +10,7 @@ #include <uapi/linux/bpf.h> #include <uapi/linux/perf_event.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" struct bpf_map_def SEC("maps") my_map = { .type = BPF_MAP_TYPE_HASH, diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index 274c884c87fe..ff43341bdfce 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -8,6 +8,7 @@ #include <uapi/linux/filter.h> #include <uapi/linux/pkt_cls.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" /* compiler workaround */ #define _htonl __builtin_bswap32 diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c index 42c44d091dd1..4f80cbe74c72 100644 --- a/samples/bpf/test_map_in_map_kern.c +++ b/samples/bpf/test_map_in_map_kern.c @@ -11,11 +11,13 @@ #include <uapi/linux/bpf.h> #include <uapi/linux/in6.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" +#include "bpf_tracing.h" #define MAX_NR_PORTS 65536 /* map #0 */ -struct bpf_map_def SEC("maps") port_a = { +struct bpf_map_def_legacy SEC("maps") port_a = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(int), @@ -23,7 +25,7 @@ struct bpf_map_def SEC("maps") port_a = { }; /* map #1 */ -struct bpf_map_def SEC("maps") port_h = { +struct bpf_map_def_legacy SEC("maps") port_h = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u32), .value_size = sizeof(int), @@ -31,7 +33,7 @@ struct bpf_map_def SEC("maps") port_h = { }; /* map #2 */ -struct bpf_map_def SEC("maps") reg_result_h = { +struct bpf_map_def_legacy SEC("maps") reg_result_h = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u32), .value_size = sizeof(int), @@ -39,7 +41,7 @@ struct bpf_map_def SEC("maps") reg_result_h = { }; /* map #3 */ -struct bpf_map_def SEC("maps") inline_result_h = { +struct bpf_map_def_legacy SEC("maps") inline_result_h = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u32), .value_size = sizeof(int), @@ -47,7 +49,7 @@ struct bpf_map_def SEC("maps") inline_result_h = { }; /* map #4 */ /* Test case #0 */ -struct bpf_map_def SEC("maps") a_of_port_a = { +struct bpf_map_def_legacy SEC("maps") a_of_port_a = { .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, .key_size = sizeof(u32), .inner_map_idx = 0, /* map_fd[0] is port_a */ @@ -55,7 +57,7 @@ struct bpf_map_def SEC("maps") a_of_port_a = { }; /* map #5 */ /* Test case #1 */ -struct bpf_map_def SEC("maps") h_of_port_a = { +struct bpf_map_def_legacy SEC("maps") h_of_port_a = { .type = BPF_MAP_TYPE_HASH_OF_MAPS, .key_size = sizeof(u32), .inner_map_idx = 0, /* map_fd[0] is port_a */ @@ -63,7 +65,7 @@ struct bpf_map_def SEC("maps") h_of_port_a = { }; /* map #6 */ /* Test case #2 */ -struct bpf_map_def SEC("maps") h_of_port_h = { +struct bpf_map_def_legacy SEC("maps") h_of_port_h = { .type = BPF_MAP_TYPE_HASH_OF_MAPS, .key_size = sizeof(u32), .inner_map_idx = 1, /* map_fd[1] is port_h */ diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c index 468a66a92ef9..8d2518e68db9 100644 --- a/samples/bpf/test_overhead_kprobe_kern.c +++ b/samples/bpf/test_overhead_kprobe_kern.c @@ -8,6 +8,7 @@ #include <linux/ptrace.h> #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c index 3a677c807044..a543358218e6 100644 --- a/samples/bpf/test_probe_write_user_kern.c +++ b/samples/bpf/test_probe_write_user_kern.c @@ -9,6 +9,7 @@ #include <uapi/linux/bpf.h> #include <linux/version.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" struct bpf_map_def SEC("maps") dnat_map = { .type = BPF_MAP_TYPE_HASH, diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c index 7068fbdde951..8dc18d233a27 100644 --- a/samples/bpf/trace_event_kern.c +++ b/samples/bpf/trace_event_kern.c @@ -10,6 +10,7 @@ #include <uapi/linux/bpf_perf_event.h> #include <uapi/linux/perf_event.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" struct key_t { char comm[TASK_COMM_LEN]; diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c index 107da148820f..1a15f6605129 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1_kern.c @@ -9,6 +9,7 @@ #include <uapi/linux/bpf.h> #include <linux/version.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 5e11c20ce5ec..d70b3ea79ea7 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -9,6 +9,7 @@ #include <linux/version.h> #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" struct bpf_map_def SEC("maps") my_map = { .type = BPF_MAP_TYPE_HASH, diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index ea1d4c19c132..9af546bebfa9 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -9,6 +9,7 @@ #include <linux/version.h> #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" struct bpf_map_def SEC("maps") my_map = { .type = BPF_MAP_TYPE_HASH, diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c index 6dd8e384de96..2a02cbe9d9a1 100644 --- a/samples/bpf/tracex4_kern.c +++ b/samples/bpf/tracex4_kern.c @@ -8,6 +8,7 @@ #include <linux/version.h> #include <uapi/linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" struct pair { u64 val; diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index 35cb0eed3be5..b3557b21a8fe 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -11,6 +11,7 @@ #include <uapi/linux/unistd.h> #include "syscall_nrs.h" #include "bpf_helpers.h" +#include "bpf_tracing.h" #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c index 411fdb21f8bc..c616508befb9 100644 --- a/samples/bpf/xdp_adjust_tail_kern.c +++ b/samples/bpf/xdp_adjust_tail_kern.c @@ -25,6 +25,9 @@ #define ICMP_TOOBIG_SIZE 98 #define ICMP_TOOBIG_PAYLOAD_SIZE 92 +/* volatile to prevent compiler optimizations */ +static volatile __u32 max_pcktsz = MAX_PCKT_SIZE; + struct bpf_map_def SEC("maps") icmpcnt = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), @@ -92,7 +95,7 @@ static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) orig_iph = data + off; icmp_hdr->type = ICMP_DEST_UNREACH; icmp_hdr->code = ICMP_FRAG_NEEDED; - icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr)); + icmp_hdr->un.frag.mtu = htons(max_pcktsz - sizeof(struct ethhdr)); icmp_hdr->checksum = 0; ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum); icmp_hdr->checksum = csum; @@ -121,7 +124,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp) int pckt_size = data_end - data; int offset; - if (pckt_size > MAX_PCKT_SIZE) { + if (pckt_size > max(max_pcktsz, ICMP_TOOBIG_SIZE)) { offset = pckt_size - ICMP_TOOBIG_SIZE; if (bpf_xdp_adjust_tail(xdp, 0 - offset)) return XDP_PASS; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index a3596b617c4c..d86e9ad0356b 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -23,6 +23,7 @@ #include "libbpf.h" #define STATS_INTERVAL_S 2U +#define MAX_PCKT_SIZE 600 static int ifindex = -1; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -72,6 +73,7 @@ static void usage(const char *cmd) printf("Usage: %s [...]\n", cmd); printf(" -i <ifname|ifindex> Interface\n"); printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); + printf(" -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); printf(" -F force loading prog\n"); @@ -85,13 +87,14 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; unsigned char opt_flags[256] = {}; - const char *optstr = "i:T:SNFh"; + const char *optstr = "i:T:P:SNFh"; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; int i, prog_fd, map_fd, opt; struct bpf_object *obj; - struct bpf_map *map; + __u32 max_pckt_size = 0; + __u32 key = 0; char filename[256]; int err; @@ -110,6 +113,9 @@ int main(int argc, char **argv) case 'T': kill_after_s = atoi(optarg); break; + case 'P': + max_pckt_size = atoi(optarg); + break; case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; @@ -150,15 +156,20 @@ int main(int argc, char **argv) if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - map = bpf_map__next(NULL, obj); - if (!map) { - printf("finding a map in obj file failed\n"); - return 1; + /* static global var 'max_pcktsz' is accessible from .data section */ + if (max_pckt_size) { + map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data"); + if (map_fd < 0) { + printf("finding a max_pcktsz map in obj file failed\n"); + return 1; + } + bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY); } - map_fd = bpf_map__fd(map); - if (!prog_fd) { - printf("load_bpf_file: %s\n", strerror(errno)); + /* fetch icmpcnt map */ + map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt"); + if (map_fd < 0) { + printf("finding a icmpcnt map in obj file failed\n"); return 1; } diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index df011ac33402..405c4e091f8b 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -378,7 +378,7 @@ static void usage(const char *prog) " -q, --queue=n Use queue n (default 0)\n" " -p, --poll Use poll syscall\n" " -S, --xdp-skb=n Use XDP skb-mod\n" - " -N, --xdp-native=n Enfore XDP native mode\n" + " -N, --xdp-native=n Enforce XDP native mode\n" " -n, --interval=n Specify statistics update interval (default 1 sec).\n" " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst index fd39215db508..3f6483e8b2df 100644 --- a/samples/pktgen/README.rst +++ b/samples/pktgen/README.rst @@ -18,7 +18,7 @@ across the sample scripts. Usage example is printed on errors:: Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX -i : ($DEV) output interface/device (required) -s : ($PKT_SIZE) packet size - -d : ($DEST_IP) destination IP + -d : ($DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed -m : ($DST_MAC) destination MAC-addr -p : ($DST_PORT) destination PORT range (e.g. 433-444) is also allowed -t : ($THREADS) threads to start diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index 4af4046d71be..dae06d5b38fa 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -5,6 +5,8 @@ # Author: Jesper Dangaaard Brouer # License: GPL +set -o errexit + ## -- General shell logging cmds -- function err() { local exitcode=$1 @@ -58,6 +60,7 @@ function pg_set() { function proc_cmd() { local result local proc_file=$1 + local status=0 # after shift, the remaining args are contained in $@ shift local proc_ctrl=${PROC_DIR}/$proc_file @@ -73,13 +76,13 @@ function proc_cmd() { echo "cmd: $@ > $proc_ctrl" fi # Quoting of "$@" is important for space expansion - echo "$@" > "$proc_ctrl" - local status=$? + echo "$@" > "$proc_ctrl" || status=$? - result=$(grep "Result: OK:" $proc_ctrl) - # Due to pgctrl, cannot use exit code $? from grep - if [[ "$result" == "" ]]; then - grep "Result:" $proc_ctrl >&2 + if [[ "$proc_file" != "pgctrl" ]]; then + result=$(grep "Result: OK:" $proc_ctrl) || true + if [[ "$result" == "" ]]; then + grep "Result:" $proc_ctrl >&2 + fi fi if (( $status != 0 )); then err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\"" @@ -105,6 +108,8 @@ function pgset() { fi } +[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT + ## -- General shell tricks -- function root_check_run_with_sudo() { @@ -163,6 +168,137 @@ function get_node_cpus() echo $node_cpu_list } +# Check $1 is in between $2, $3 ($2 <= $1 <= $3) +function in_between() { [[ ($1 -ge $2) && ($1 -le $3) ]] ; } + +# Extend shrunken IPv6 address. +# fe80::42:bcff:fe84:e10a => fe80:0:0:0:42:bcff:fe84:e10a +function extend_addr6() +{ + local addr=$1 + local sep=: sep2=:: + local sep_cnt=$(tr -cd $sep <<< $1 | wc -c) + local shrink + + # separator count should be (2 <= $sep_cnt <= 7) + if ! (in_between $sep_cnt 2 7); then + err 5 "Invalid IP6 address: $1" + fi + + # if shrink '::' occurs multiple, it's malformed. + shrink=( $(egrep -o "$sep{2,}" <<< $addr) ) + if [[ ${#shrink[@]} -ne 0 ]]; then + if [[ ${#shrink[@]} -gt 1 || ( ${shrink[0]} != $sep2 ) ]]; then + err 5 "Invalid IP6 address: $1" + fi + fi + + # add 0 at begin & end, and extend addr by adding :0 + [[ ${addr:0:1} == $sep ]] && addr=0${addr} + [[ ${addr: -1} == $sep ]] && addr=${addr}0 + echo "${addr/$sep2/$(printf ':0%.s' $(seq $[8-sep_cnt])):}" +} + +# Given a single IP(v4/v6) address, whether it is valid. +function validate_addr() +{ + # check function is called with (funcname)6 + [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6 + local bitlen=$[ IP6 ? 128 : 32 ] + local len=$[ IP6 ? 8 : 4 ] + local max=$[ 2**(len*2)-1 ] + local net prefix + local addr sep + + IFS='/' read net prefix <<< $1 + [[ $IP6 ]] && net=$(extend_addr6 $net) + + # if prefix exists, check (0 <= $prefix <= $bitlen) + if [[ -n $prefix ]]; then + if ! (in_between $prefix 0 $bitlen); then + err 5 "Invalid prefix: /$prefix" + fi + fi + + # set separator for each IP(v4/v6) + [[ $IP6 ]] && sep=: || sep=. + IFS=$sep read -a addr <<< $net + + # array length + if [[ ${#addr[@]} != $len ]]; then + err 5 "Invalid IP$IP6 address: $1" + fi + + # check each digit (0 <= $digit <= $max) + for digit in "${addr[@]}"; do + [[ $IP6 ]] && digit=$[ 16#$digit ] + if ! (in_between $digit 0 $max); then + err 5 "Invalid IP$IP6 address: $1" + fi + done + + return 0 +} + +function validate_addr6() { validate_addr $@ ; } + +# Given a single IP(v4/v6) or CIDR, return minimum and maximum IP addr. +function parse_addr() +{ + # check function is called with (funcname)6 + [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6 + local net prefix + local min_ip max_ip + + IFS='/' read net prefix <<< $1 + [[ $IP6 ]] && net=$(extend_addr6 $net) + + if [[ -z $prefix ]]; then + min_ip=$net + max_ip=$net + else + # defining array for converting Decimal 2 Binary + # 00000000 00000001 00000010 00000011 00000100 ... + local d2b='{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}' + [[ $IP6 ]] && d2b+=$d2b + eval local D2B=($d2b) + + local bitlen=$[ IP6 ? 128 : 32 ] + local remain=$[ bitlen-prefix ] + local octet=$[ IP6 ? 16 : 8 ] + local min_mask max_mask + local min max + local ip_bit + local ip sep + + # set separator for each IP(v4/v6) + [[ $IP6 ]] && sep=: || sep=. + IFS=$sep read -ra ip <<< $net + + min_mask="$(printf '1%.s' $(seq $prefix))$(printf '0%.s' $(seq $remain))" + max_mask="$(printf '0%.s' $(seq $prefix))$(printf '1%.s' $(seq $remain))" + + # calculate min/max ip with &,| operator + for i in "${!ip[@]}"; do + digit=$[ IP6 ? 16#${ip[$i]} : ${ip[$i]} ] + ip_bit=${D2B[$digit]} + + idx=$[ octet*i ] + min[$i]=$[ 2#$ip_bit & 2#${min_mask:$idx:$octet} ] + max[$i]=$[ 2#$ip_bit | 2#${max_mask:$idx:$octet} ] + [[ $IP6 ]] && { min[$i]=$(printf '%X' ${min[$i]}); + max[$i]=$(printf '%X' ${max[$i]}); } + done + + min_ip=$(IFS=$sep; echo "${min[*]}") + max_ip=$(IFS=$sep; echo "${max[*]}") + fi + + echo $min_ip $max_ip +} + +function parse_addr6() { parse_addr $@ ; } + # Given a single or range of port(s), return minimum and maximum port number. function parse_ports() { @@ -185,9 +321,9 @@ function validate_ports() local min_port=$1 local max_port=$2 - # 0 < port < 65536 - if [[ $min_port -gt 0 && $min_port -lt 65536 ]]; then - if [[ $max_port -gt 0 && $max_port -lt 65536 ]]; then + # 1 <= port <= 65535 + if (in_between $min_port 1 65535); then + if (in_between $max_port 1 65535); then if [[ $min_port -le $max_port ]]; then return 0 fi diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh index a06b00a0c7b6..ff0ed474fee9 100644 --- a/samples/pktgen/parameters.sh +++ b/samples/pktgen/parameters.sh @@ -8,7 +8,7 @@ function usage() { echo "Usage: $0 [-vx] -i ethX" echo " -i : (\$DEV) output interface/device (required)" echo " -s : (\$PKT_SIZE) packet size" - echo " -d : (\$DEST_IP) destination IP" + echo " -d : (\$DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed" echo " -m : (\$DST_MAC) destination MAC-addr" echo " -p : (\$DST_PORT) destination PORT range (e.g. 433-444) is also allowed" echo " -t : (\$THREADS) threads to start" diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index e14b1a9144d9..1b6204125d2d 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -41,9 +41,13 @@ fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=1024 [ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -71,13 +75,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Inject packet into RX path of stack diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh index 82c3e504e056..e607cb369b20 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -24,9 +24,13 @@ if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" fi [ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -54,13 +58,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Inject packet into TX qdisc egress path of stack diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index d1702fdde8f3..a4e250b45dce 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -22,17 +22,21 @@ fi # Example enforce param "-m" for dst_mac [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac" [ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config DELAY="0" # Zero means max speed # Flow variation random source port between min and max -UDP_MIN=9 -UDP_MAX=109 +UDP_SRC_MIN=9 +UDP_SRC_MAX=109 # General cleanup everything since last run # (especially important if other threads were configured by other scripts) @@ -61,19 +65,20 @@ pg_set $DEV "flag NO_TIMESTAMP" # Destination pg_set $DEV "dst_mac $DST_MAC" -pg_set $DEV "dst$IP6 $DEST_IP" +pg_set $DEV "dst${IP6}_min $DST_MIN" +pg_set $DEV "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $DEV "flag UDPDST_RND" - pg_set $DEV "udp_dst_min $DST_MIN" - pg_set $DEV "udp_dst_max $DST_MAX" + pg_set $DEV "udp_dst_min $UDP_DST_MIN" + pg_set $DEV "udp_dst_max $UDP_DST_MAX" fi # Setup random UDP port src range pg_set $DEV "flag UDPSRC_RND" -pg_set $DEV "udp_src_min $UDP_MIN" -pg_set $DEV "udp_src_max $UDP_MAX" +pg_set $DEV "udp_src_min $UDP_SRC_MIN" +pg_set $DEV "udp_src_max $UDP_SRC_MAX" # start_run echo "Running... ctrl^C to stop" >&2 diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index 7f7a9a27548f..cb2495fcdc60 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -21,17 +21,21 @@ DELAY="0" # Zero means max speed [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Flow variation random source port between min and max -UDP_MIN=9 -UDP_MAX=109 +UDP_SRC_MIN=9 +UDP_SRC_MAX=109 # (example of setting default params in your script) if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # General cleanup everything since last run @@ -62,19 +66,20 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup random UDP port src range pg_set $dev "flag UDPSRC_RND" - pg_set $dev "udp_src_min $UDP_MIN" - pg_set $dev "udp_src_max $UDP_MAX" + pg_set $dev "udp_src_min $UDP_SRC_MIN" + pg_set $dev "udp_src_max $UDP_SRC_MAX" done # start_run diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index b520637817ce..fff50765a5aa 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -33,9 +33,13 @@ fi [ -z "$BURST" ] && BURST=32 [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # No need for clones when bursting [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -62,13 +66,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup burst, for easy testing -b 0 disable bursting diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index 5b6e9d9cb5b5..2cd6b701400d 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -17,9 +17,13 @@ source ${basedir}/parameters.sh [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # NOTICE: Script specific settings @@ -37,6 +41,9 @@ if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" fi +# 198.18.0.0 / 198.19.255.255 +read -r SRC_MIN SRC_MAX <<< $(parse_addr 198.18.0.0/15) + # General cleanup everything since last run pg_ctrl "reset" @@ -58,19 +65,20 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst $DEST_IP" + pg_set $dev "dst_min $DST_MIN" + pg_set $dev "dst_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Randomize source IP-addresses pg_set $dev "flag IPSRC_RND" - pg_set $dev "src_min 198.18.0.0" - pg_set $dev "src_max 198.19.255.255" + pg_set $dev "src_min $SRC_MIN" + pg_set $dev "src_max $SRC_MAX" # Limit number of flows (max 65535) pg_set $dev "flows $FLOWS" diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 0c06e63fbe97..4cb6252ade39 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -22,9 +22,13 @@ source ${basedir}/parameters.sh [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$BURST" ] && BURST=32 [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely +if [ -n "$DEST_IP" ]; then + validate_addr $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # Base Config @@ -51,13 +55,14 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst $DEST_IP" + pg_set $dev "dst_min $DST_MIN" + pg_set $dev "dst_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup source IP-addresses based on thread number diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh index 97f0266c0356..728106060a02 100755 --- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh +++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh @@ -20,8 +20,8 @@ DELAY="0" # Zero means max speed [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Flow variation random source port between min and max -UDP_MIN=9 -UDP_MAX=109 +UDP_SRC_MIN=9 +UDP_SRC_MAX=109 node=`get_iface_node $DEV` irq_array=(`get_iface_irqs $DEV`) @@ -35,9 +35,13 @@ if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +if [ -n "$DEST_IP" ]; then + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) +fi if [ -n "$DST_PORT" ]; then - read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT) - validate_ports $DST_MIN $DST_MAX + read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) + validate_ports $UDP_DST_MIN $UDP_DST_MAX fi # General cleanup everything since last run @@ -79,19 +83,20 @@ for ((i = 0; i < $THREADS; i++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst$IP6 $DEST_IP" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range pg_set $dev "flag UDPDST_RND" - pg_set $dev "udp_dst_min $DST_MIN" - pg_set $dev "udp_dst_max $DST_MAX" + pg_set $dev "udp_dst_min $UDP_DST_MIN" + pg_set $dev "udp_dst_max $UDP_DST_MAX" fi # Setup random UDP port src range pg_set $dev "flag UDPSRC_RND" - pg_set $dev "udp_src_min $UDP_MIN" - pg_set $dev "udp_src_max $UDP_MAX" + pg_set $dev "udp_src_min $UDP_SRC_MIN" + pg_set $dev "udp_src_max $UDP_SRC_MAX" done # start_run diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 894cc58c1a03..7df9ce598ff9 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -391,6 +391,154 @@ SEE ALSO print('') +class PrinterHelpers(Printer): + """ + A printer for dumping collected information about helpers as C header to + be included from BPF program. + @helpers: array of Helper objects to print to standard output + """ + + type_fwds = [ + 'struct bpf_fib_lookup', + 'struct bpf_perf_event_data', + 'struct bpf_perf_event_value', + 'struct bpf_sock', + 'struct bpf_sock_addr', + 'struct bpf_sock_ops', + 'struct bpf_sock_tuple', + 'struct bpf_spin_lock', + 'struct bpf_sysctl', + 'struct bpf_tcp_sock', + 'struct bpf_tunnel_key', + 'struct bpf_xfrm_state', + 'struct pt_regs', + 'struct sk_reuseport_md', + 'struct sockaddr', + 'struct tcphdr', + + 'struct __sk_buff', + 'struct sk_msg_md', + 'struct xdp_md', + ] + known_types = { + '...', + 'void', + 'const void', + 'char', + 'const char', + 'int', + 'long', + 'unsigned long', + + '__be16', + '__be32', + '__wsum', + + 'struct bpf_fib_lookup', + 'struct bpf_perf_event_data', + 'struct bpf_perf_event_value', + 'struct bpf_sock', + 'struct bpf_sock_addr', + 'struct bpf_sock_ops', + 'struct bpf_sock_tuple', + 'struct bpf_spin_lock', + 'struct bpf_sysctl', + 'struct bpf_tcp_sock', + 'struct bpf_tunnel_key', + 'struct bpf_xfrm_state', + 'struct pt_regs', + 'struct sk_reuseport_md', + 'struct sockaddr', + 'struct tcphdr', + } + mapped_types = { + 'u8': '__u8', + 'u16': '__u16', + 'u32': '__u32', + 'u64': '__u64', + 's8': '__s8', + 's16': '__s16', + 's32': '__s32', + 's64': '__s64', + 'size_t': 'unsigned long', + 'struct bpf_map': 'void', + 'struct sk_buff': 'struct __sk_buff', + 'const struct sk_buff': 'const struct __sk_buff', + 'struct sk_msg_buff': 'struct sk_msg_md', + 'struct xdp_buff': 'struct xdp_md', + } + + def print_header(self): + header = '''\ +/* This is auto-generated file. See bpf_helpers_doc.py for details. */ + +/* Forward declarations of BPF structs */''' + + print(header) + for fwd in self.type_fwds: + print('%s;' % fwd) + print('') + + def print_footer(self): + footer = '' + print(footer) + + def map_type(self, t): + if t in self.known_types: + return t + if t in self.mapped_types: + return self.mapped_types[t] + print("") + print("Unrecognized type '%s', please add it to known types!" % t) + sys.exit(1) + + seen_helpers = set() + + def print_one(self, helper): + proto = helper.proto_break_down() + + if proto['name'] in self.seen_helpers: + return + self.seen_helpers.add(proto['name']) + + print('/*') + print(" * %s" % proto['name']) + print(" *") + if (helper.desc): + # Do not strip all newline characters: formatted code at the end of + # a section must be followed by a blank line. + for line in re.sub('\n$', '', helper.desc, count=1).split('\n'): + print(' *{}{}'.format(' \t' if line else '', line)) + + if (helper.ret): + print(' *') + print(' * Returns') + for line in helper.ret.rstrip().split('\n'): + print(' *{}{}'.format(' \t' if line else '', line)) + + print(' */') + print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']), + proto['ret_star'], proto['name']), end='') + comma = '' + for i, a in enumerate(proto['args']): + t = a['type'] + n = a['name'] + if proto['name'] == 'bpf_get_socket_cookie' and i == 0: + t = 'void' + n = 'ctx' + one_arg = '{}{}'.format(comma, self.map_type(t)) + if n: + if a['star']: + one_arg += ' {}'.format(a['star']) + else: + one_arg += ' ' + one_arg += '{}'.format(n) + comma = ', ' + print(one_arg, end='') + + print(') = (void *) %d;' % len(self.seen_helpers)) + print('') + ############################################################################### # If script is launched from scripts/ from kernel tree and can access @@ -405,6 +553,8 @@ Parse eBPF header file and generate documentation for eBPF helper functions. The RST-formatted output produced can be turned into a manual page with the rst2man utility. """) +argParser.add_argument('--header', action='store_true', + help='generate C header file') if (os.path.isfile(bpfh)): argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h', default=bpfh) @@ -417,5 +567,8 @@ headerParser = HeaderParser(args.filename) headerParser.run() # Print formatted output to standard output. -printer = PrinterRST(headerParser.helpers) +if args.header: + printer = PrinterHelpers(headerParser.helpers) +else: + printer = PrinterRST(headerParser.helpers) printer.print_all() diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 58345ba0528e..c97fdae8f71b 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -83,6 +83,8 @@ static const struct nlmsg_perm nlmsg_route_perms[] = { RTM_NEWNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_READ }, + { RTM_NEWLINKPROP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_DELLINKPROP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, }; static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = @@ -166,7 +168,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ - BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOP + 3)); + BUILD_BUG_ON(RTM_MAX != (RTM_NEWLINKPROP + 3)); err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms, sizeof(nlmsg_route_perms)); break; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 93d008687020..4764581ff9ea 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -27,7 +27,7 @@ bool json_output; bool show_pinned; bool block_mount; bool verifier_logs; -int bpf_flags; +bool relaxed_maps; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; @@ -396,7 +396,7 @@ int main(int argc, char **argv) show_pinned = true; break; case 'm': - bpf_flags = MAPS_RELAX_COMPAT; + relaxed_maps = true; break; case 'n': block_mount = true; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index af9ad56c303a..2899095f8254 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -94,7 +94,7 @@ extern bool json_output; extern bool show_pinned; extern bool block_mount; extern bool verifier_logs; -extern int bpf_flags; +extern bool relaxed_maps; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 43fdbbfe41bb..27da96a797ab 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1092,9 +1092,7 @@ free_data_in: static int load_with_options(int argc, char **argv, bool first_prog_only) { struct bpf_object_load_attr load_attr = { 0 }; - struct bpf_object_open_attr open_attr = { - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; + enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; enum bpf_attach_type expected_attach_type; struct map_replace *map_replace = NULL; struct bpf_program *prog = NULL, *pos; @@ -1105,11 +1103,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) const char *pinfile; unsigned int i, j; __u32 ifindex = 0; + const char *file; int idx, err; + LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .relaxed_maps = relaxed_maps, + ); + if (!REQ_ARGS(2)) return -1; - open_attr.file = GET_ARG(); + file = GET_ARG(); pinfile = GET_ARG(); while (argc) { @@ -1118,7 +1121,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) NEXT_ARG(); - if (open_attr.prog_type != BPF_PROG_TYPE_UNSPEC) { + if (common_prog_type != BPF_PROG_TYPE_UNSPEC) { p_err("program type already specified"); goto err_free_reuse_maps; } @@ -1135,8 +1138,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) strcat(type, *argv); strcat(type, "/"); - err = libbpf_prog_type_by_name(type, - &open_attr.prog_type, + err = libbpf_prog_type_by_name(type, &common_prog_type, &expected_attach_type); free(type); if (err < 0) @@ -1224,16 +1226,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) set_max_rlimit(); - obj = __bpf_object__open_xattr(&open_attr, bpf_flags); + obj = bpf_object__open_file(file, &open_opts); if (IS_ERR_OR_NULL(obj)) { p_err("failed to open object file"); goto err_free_reuse_maps; } bpf_object__for_each_program(pos, obj) { - enum bpf_prog_type prog_type = open_attr.prog_type; + enum bpf_prog_type prog_type = common_prog_type; - if (open_attr.prog_type == BPF_PROG_TYPE_UNSPEC) { + if (prog_type == BPF_PROG_TYPE_UNSPEC) { const char *sec_name = bpf_program__title(pos, false); err = libbpf_prog_type_by_name(sec_name, &prog_type, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77c6be96d676..a65c3b0c6935 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -794,7 +794,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(char *buf, u32 size_of_buf) + * int bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -1023,7 +1023,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1068,7 +1068,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) + * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1085,7 +1085,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags) + * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1154,7 +1154,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1172,7 +1172,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * 0 * - * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1595,7 +1595,7 @@ union bpf_attr { * Return * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. * - * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1715,7 +1715,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1947,7 +1947,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) + * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -1980,7 +1980,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) + * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2033,7 +2033,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) + * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2392,7 +2392,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2408,9 +2408,9 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description - * Will remove *pop* bytes from a *msg* starting at byte *start*. + * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if * an allocation and copy are required due to a full ring buffer. * However, the helper will try to avoid doing the allocation @@ -2505,7 +2505,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buf *skb) + * int bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index d9e9dec04605..35bf013e368c 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -3,3 +3,7 @@ libbpf.pc FEATURE-DUMP.libbpf test_libbpf libbpf.so.* +TAGS +tags +cscope.* +/bpf_helper_defs.h diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 56ce6292071b..bfeafb9d2d49 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -56,7 +56,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -143,6 +143,8 @@ LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) +TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) + GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \ @@ -150,22 +152,14 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) - -CXX_TEST_TARGET = $(OUTPUT)test_libbpf - -ifeq ($(feature-cxx), 1) - CMD_TARGETS += $(CXX_TEST_TARGET) -endif - -TARGETS = $(CMD_TARGETS) +CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) $(OUTPUT)test_libbpf all: fixdep $(Q)$(MAKE) all_cmd all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep bpfdep +$(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -183,22 +177,27 @@ $(BPF_IN_SHARED): force elfdep bpfdep echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep bpfdep +$(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) +bpf_helper_defs.h: $(srctree)/include/uapi/linux/bpf.h + $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \ + --file $(srctree)/include/uapi/linux/bpf.h > bpf_helper_defs.h + $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) - $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ + $(QUIET_LINK)$(CC) $(LDFLAGS) \ + --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) $(OUTPUT)libbpf.a: $(BPF_IN_STATIC) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a - $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ +$(OUTPUT)test_libbpf: test_libbpf.c $(OUTPUT)libbpf.a + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(INCLUDES) $^ -lelf -o $@ $(OUTPUT)libbpf.pc: $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ @@ -247,13 +246,18 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -install_headers: +install_headers: bpf_helper_defs.h $(call QUIET_INSTALL, headers) \ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ $(call do_install,btf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ - $(call do_install,xsk.h,$(prefix)/include/bpf,644); + $(call do_install,xsk.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644); install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ @@ -268,14 +272,15 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \ + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ - *.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR) + *.pc LIBBPF-CFLAGS bpf_helper_defs.h \ + $(SHARED_OBJDIR) $(STATIC_OBJDIR) $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -PHONY += force elfdep bpfdep +PHONY += force elfdep bpfdep cscope tags force: elfdep: @@ -284,6 +289,14 @@ elfdep: bpfdep: @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi +cscope: + ls *.c *.h > cscope.files + cscope -b -q -I $(srctree)/include -f cscope.out + +tags: + rm -f TAGS tags + ls *.c *.h | xargs $(TAGS_PROG) -a + # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h new file mode 100644 index 000000000000..4daf04c25493 --- /dev/null +++ b/tools/lib/bpf/bpf_core_read.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_CORE_READ_H__ +#define __BPF_CORE_READ_H__ + +/* + * bpf_core_read() abstracts away bpf_probe_read() call and captures offset + * relocation for source address using __builtin_preserve_access_index() + * built-in, provided by Clang. + * + * __builtin_preserve_access_index() takes as an argument an expression of + * taking an address of a field within struct/union. It makes compiler emit + * a relocation, which records BTF type ID describing root struct/union and an + * accessor string which describes exact embedded field that was used to take + * an address. See detailed description of this relocation format and + * semantics in comments to struct bpf_offset_reloc in libbpf_internal.h. + * + * This relocation allows libbpf to adjust BPF instruction to use correct + * actual field offset, based on target kernel BTF type that matches original + * (local) BTF, used to record relocation. + */ +#define bpf_core_read(dst, sz, src) \ + bpf_probe_read(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) + +/* + * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() + * additionally emitting BPF CO-RE field relocation for specified source + * argument. + */ +#define bpf_core_read_str(dst, sz, src) \ + bpf_probe_read_str(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) + +#define ___concat(a, b) a ## b +#define ___apply(fn, n) ___concat(fn, n) +#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N + +/* + * return number of provided arguments; used for switch-based variadic macro + * definitions (see ___last, ___arrow, etc below) + */ +#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +/* + * return 0 if no arguments are passed, N - otherwise; used for + * recursively-defined macros to specify termination (0) case, and generic + * (N) case (e.g., ___read_ptrs, ___core_read) + */ +#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___last1(x) x +#define ___last2(a, x) x +#define ___last3(a, b, x) x +#define ___last4(a, b, c, x) x +#define ___last5(a, b, c, d, x) x +#define ___last6(a, b, c, d, e, x) x +#define ___last7(a, b, c, d, e, f, x) x +#define ___last8(a, b, c, d, e, f, g, x) x +#define ___last9(a, b, c, d, e, f, g, h, x) x +#define ___last10(a, b, c, d, e, f, g, h, i, x) x +#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___nolast2(a, _) a +#define ___nolast3(a, b, _) a, b +#define ___nolast4(a, b, c, _) a, b, c +#define ___nolast5(a, b, c, d, _) a, b, c, d +#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e +#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f +#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g +#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h +#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i +#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___arrow1(a) a +#define ___arrow2(a, b) a->b +#define ___arrow3(a, b, c) a->b->c +#define ___arrow4(a, b, c, d) a->b->c->d +#define ___arrow5(a, b, c, d, e) a->b->c->d->e +#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f +#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g +#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h +#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i +#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j +#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___type(...) typeof(___arrow(__VA_ARGS__)) + +#define ___read(read_fn, dst, src_type, src, accessor) \ + read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) + +/* "recursively" read a sequence of inner pointers using local __t var */ +#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); +#define ___rd_last(...) \ + ___read(bpf_core_read, &__t, \ + ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) +#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___read_ptrs(src, ...) \ + ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) + +#define ___core_read0(fn, dst, src, a) \ + ___read(fn, dst, ___type(src), src, a); +#define ___core_readN(fn, dst, src, ...) \ + ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ + ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ + ___last(__VA_ARGS__)); +#define ___core_read(fn, dst, src, a, ...) \ + ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ + src, a, ##__VA_ARGS__) + +/* + * BPF_CORE_READ_INTO() is a more performance-conscious variant of + * BPF_CORE_READ(), in which final field is read into user-provided storage. + * See BPF_CORE_READ() below for more details on general usage. + */ +#define BPF_CORE_READ_INTO(dst, src, a, ...) \ + ({ \ + ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ + }) + +/* + * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as + * BPF_CORE_READ() for intermediate pointers, but then executes (and returns + * corresponding error code) bpf_core_read_str() for final string read. + */ +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ + ({ \ + ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ + }) + +/* + * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially + * when there are few pointer chasing steps. + * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: + * int x = s->a.b.c->d.e->f->g; + * can be succinctly achieved using BPF_CORE_READ as: + * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); + * + * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF + * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * 1. const void *__t = s->a.b.c; + * 2. __t = __t->d.e; + * 3. __t = __t->f; + * 4. return __t->g; + * + * Equivalence is logical, because there is a heavy type casting/preservation + * involved, as well as all the reads are happening through bpf_probe_read() + * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * + * N.B. Only up to 9 "field accessors" are supported, which should be more + * than enough for any practical purpose. + */ +#define BPF_CORE_READ(src, a, ...) \ + ({ \ + ___type(src, a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ + __r; \ + }) + +#endif + diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h index fbe28008450f..fbe28008450f 100644 --- a/tools/testing/selftests/bpf/bpf_endian.h +++ b/tools/lib/bpf/bpf_endian.h diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h new file mode 100644 index 000000000000..2203595f38c3 --- /dev/null +++ b/tools/lib/bpf/bpf_helpers.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_HELPERS__ +#define __BPF_HELPERS__ + +#include "bpf_helper_defs.h" + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) typeof(val) *name + +/* Helper macro to print out debug messages */ +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* + * Helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +#ifndef __always_inline +#define __always_inline __attribute__((always_inline)) +#endif + +/* + * Helper structure used by eBPF C program + * to describe BPF map attributes to libbpf loader + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + +#endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h new file mode 100644 index 000000000000..b0dafe8b4ebc --- /dev/null +++ b/tools/lib/bpf/bpf_tracing.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACING_H__ +#define __BPF_TRACING_H__ + +/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ +#if defined(__TARGET_ARCH_x86) + #define bpf_target_x86 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_s390) + #define bpf_target_s390 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm) + #define bpf_target_arm + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm64) + #define bpf_target_arm64 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_mips) + #define bpf_target_mips + #define bpf_target_defined +#elif defined(__TARGET_ARCH_powerpc) + #define bpf_target_powerpc + #define bpf_target_defined +#elif defined(__TARGET_ARCH_sparc) + #define bpf_target_sparc + #define bpf_target_defined +#else + #undef bpf_target_defined +#endif + +/* Fall back to what the compiler says */ +#ifndef bpf_target_defined +#if defined(__x86_64__) + #define bpf_target_x86 +#elif defined(__s390__) + #define bpf_target_s390 +#elif defined(__arm__) + #define bpf_target_arm +#elif defined(__aarch64__) + #define bpf_target_arm64 +#elif defined(__mips__) + #define bpf_target_mips +#elif defined(__powerpc__) + #define bpf_target_powerpc +#elif defined(__sparc__) + #define bpf_target_sparc +#endif +#endif + +#if defined(bpf_target_x86) + +#ifdef __KERNEL__ +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->ip) +#else +#ifdef __i386__ +/* i386 kernel is built with -mregparm=3 */ +#define PT_REGS_PARM1(x) ((x)->eax) +#define PT_REGS_PARM2(x) ((x)->edx) +#define PT_REGS_PARM3(x) ((x)->ecx) +#define PT_REGS_PARM4(x) 0 +#define PT_REGS_PARM5(x) 0 +#define PT_REGS_RET(x) ((x)->esp) +#define PT_REGS_FP(x) ((x)->ebp) +#define PT_REGS_RC(x) ((x)->eax) +#define PT_REGS_SP(x) ((x)->esp) +#define PT_REGS_IP(x) ((x)->eip) +#else +#define PT_REGS_PARM1(x) ((x)->rdi) +#define PT_REGS_PARM2(x) ((x)->rsi) +#define PT_REGS_PARM3(x) ((x)->rdx) +#define PT_REGS_PARM4(x) ((x)->rcx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->rsp) +#define PT_REGS_FP(x) ((x)->rbp) +#define PT_REGS_RC(x) ((x)->rax) +#define PT_REGS_SP(x) ((x)->rsp) +#define PT_REGS_IP(x) ((x)->rip) +#endif +#endif + +#elif defined(bpf_target_s390) + +/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_S390 const volatile user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) + +#elif defined(bpf_target_arm) + +#define PT_REGS_PARM1(x) ((x)->uregs[0]) +#define PT_REGS_PARM2(x) ((x)->uregs[1]) +#define PT_REGS_PARM3(x) ((x)->uregs[2]) +#define PT_REGS_PARM4(x) ((x)->uregs[3]) +#define PT_REGS_PARM5(x) ((x)->uregs[4]) +#define PT_REGS_RET(x) ((x)->uregs[14]) +#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->uregs[0]) +#define PT_REGS_SP(x) ((x)->uregs[13]) +#define PT_REGS_IP(x) ((x)->uregs[12]) + +#elif defined(bpf_target_arm64) + +/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_ARM64 const volatile struct user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) + +#elif defined(bpf_target_mips) + +#define PT_REGS_PARM1(x) ((x)->regs[4]) +#define PT_REGS_PARM2(x) ((x)->regs[5]) +#define PT_REGS_PARM3(x) ((x)->regs[6]) +#define PT_REGS_PARM4(x) ((x)->regs[7]) +#define PT_REGS_PARM5(x) ((x)->regs[8]) +#define PT_REGS_RET(x) ((x)->regs[31]) +#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->regs[1]) +#define PT_REGS_SP(x) ((x)->regs[29]) +#define PT_REGS_IP(x) ((x)->cp0_epc) + +#elif defined(bpf_target_powerpc) + +#define PT_REGS_PARM1(x) ((x)->gpr[3]) +#define PT_REGS_PARM2(x) ((x)->gpr[4]) +#define PT_REGS_PARM3(x) ((x)->gpr[5]) +#define PT_REGS_PARM4(x) ((x)->gpr[6]) +#define PT_REGS_PARM5(x) ((x)->gpr[7]) +#define PT_REGS_RC(x) ((x)->gpr[3]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->nip) + +#elif defined(bpf_target_sparc) + +#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +/* Should this also be a bpf_target check for the sparc case? */ +#if defined(__arch64__) +#define PT_REGS_IP(x) ((x)->tpc) +#else +#define PT_REGS_IP(x) ((x)->pc) +#endif + +#endif + +#if defined(bpf_target_powerpc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#elif defined(bpf_target_sparc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#else +#define BPF_KPROBE_READ_RET_IP(ip, ctx) \ + ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) +#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ + ({ bpf_probe_read(&(ip), sizeof(ip), \ + (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +#endif + +#endif diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index ede55fec3618..139812b46c7b 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -876,7 +876,6 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, __u16 vlen = btf_vlen(t); packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; - align = packed ? 1 : btf_align_of(d->btf, id); btf_dump_printf(d, "%s%s%s {", is_struct ? "struct" : "union", @@ -906,6 +905,13 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, btf_dump_printf(d, ";"); } + /* pad at the end, if necessary */ + if (is_struct) { + align = packed ? 1 : btf_align_of(d->btf, id); + btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, + lvl + 1); + } + if (vlen) btf_dump_printf(d, "\n"); btf_dump_printf(d, "%s}", pfx(lvl)); @@ -969,6 +975,17 @@ static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, { const char *name = btf_dump_ident_name(d, id); + /* + * Old GCC versions are emitting invalid typedef for __gnuc_va_list + * pointing to VOID. This generates warnings from btf_dump() and + * results in uncompilable header file, so we are fixing it up here + * with valid typedef into __builtin_va_list. + */ + if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) { + btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list"); + return; + } + btf_dump_printf(d, "typedef "); btf_dump_emit_type_decl(d, t->type, name, lvl); } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e0276520171b..a02cdedc4e3f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -33,6 +33,7 @@ #include <linux/limits.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> +#include <linux/version.h> #include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -255,7 +256,7 @@ struct bpf_object { */ struct { int fd; - void *obj_buf; + const void *obj_buf; size_t obj_buf_sz; Elf *elf; GElf_Ehdr ehdr; @@ -491,9 +492,21 @@ bpf_object__init_prog_names(struct bpf_object *obj) return 0; } +static __u32 get_kernel_version(void) +{ + __u32 major, minor, patch; + struct utsname info; + + uname(&info); + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) + return 0; + return KERNEL_VERSION(major, minor, patch); +} + static struct bpf_object *bpf_object__new(const char *path, - void *obj_buf, - size_t obj_buf_sz) + const void *obj_buf, + size_t obj_buf_sz, + const char *obj_name) { struct bpf_object *obj; char *end; @@ -505,11 +518,17 @@ static struct bpf_object *bpf_object__new(const char *path, } strcpy(obj->path, path); - /* Using basename() GNU version which doesn't modify arg. */ - strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1); - end = strchr(obj->name, '.'); - if (end) - *end = 0; + if (obj_name) { + strncpy(obj->name, obj_name, sizeof(obj->name) - 1); + obj->name[sizeof(obj->name) - 1] = 0; + } else { + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(obj->name, basename((void *)path), + sizeof(obj->name) - 1); + end = strchr(obj->name, '.'); + if (end) + *end = 0; + } obj->efile.fd = -1; /* @@ -526,6 +545,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.rodata_shndx = -1; obj->efile.bss_shndx = -1; + obj->kern_version = get_kernel_version(); obj->loaded = false; INIT_LIST_HEAD(&obj->list); @@ -569,7 +589,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) * obj_buf should have been validated by * bpf_object__open_buffer(). */ - obj->efile.elf = elf_memory(obj->efile.obj_buf, + obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { obj->efile.fd = open(obj->path, O_RDONLY); @@ -636,21 +656,6 @@ bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) return 0; } -static int -bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) -{ - __u32 kver; - - if (size != sizeof(kver)) { - pr_warning("invalid kver section in %s\n", obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - memcpy(&kver, data, sizeof(kver)); - obj->kern_version = kver; - pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); - return 0; -} - static int compare_bpf_map(const void *_a, const void *_b) { const struct bpf_map *a = _a; @@ -1568,11 +1573,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) if (err) return err; } else if (strcmp(name, "version") == 0) { - err = bpf_object__init_kversion(obj, - data->d_buf, - data->d_size); - if (err) - return err; + /* skip, we don't need it anymore */ } else if (strcmp(name, "maps") == 0) { obj->efile.maps_shndx = idx; } else if (strcmp(name, MAPS_ELF_SEC) == 0) { @@ -3551,54 +3552,9 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } -static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) -{ - switch (type) { - case BPF_PROG_TYPE_SOCKET_FILTER: - case BPF_PROG_TYPE_SCHED_CLS: - case BPF_PROG_TYPE_SCHED_ACT: - case BPF_PROG_TYPE_XDP: - case BPF_PROG_TYPE_CGROUP_SKB: - case BPF_PROG_TYPE_CGROUP_SOCK: - case BPF_PROG_TYPE_LWT_IN: - case BPF_PROG_TYPE_LWT_OUT: - case BPF_PROG_TYPE_LWT_XMIT: - case BPF_PROG_TYPE_LWT_SEG6LOCAL: - case BPF_PROG_TYPE_SOCK_OPS: - case BPF_PROG_TYPE_SK_SKB: - case BPF_PROG_TYPE_CGROUP_DEVICE: - case BPF_PROG_TYPE_SK_MSG: - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - case BPF_PROG_TYPE_LIRC_MODE2: - case BPF_PROG_TYPE_SK_REUSEPORT: - case BPF_PROG_TYPE_FLOW_DISSECTOR: - case BPF_PROG_TYPE_UNSPEC: - case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_RAW_TRACEPOINT: - case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: - case BPF_PROG_TYPE_PERF_EVENT: - case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_CGROUP_SOCKOPT: - return false; - case BPF_PROG_TYPE_KPROBE: - default: - return true; - } -} - -static int bpf_object__validate(struct bpf_object *obj, bool needs_kver) -{ - if (needs_kver && obj->kern_version == 0) { - pr_warning("%s doesn't provide kernel version\n", - obj->path); - return -LIBBPF_ERRNO__KVERSION; - } - return 0; -} - static struct bpf_object * -__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, - bool needs_kver, int flags) +__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, + const char *obj_name, int flags) { struct bpf_object *obj; int err; @@ -3608,7 +3564,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } - obj = bpf_object__new(path, obj_buf, obj_buf_sz); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -3617,7 +3573,6 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); - CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); bpf_object__elf_finish(obj); return obj; @@ -3626,8 +3581,8 @@ out: return ERR_PTR(err); } -struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, - int flags) +static struct bpf_object * +__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) { /* param validation */ if (!attr->file) @@ -3635,9 +3590,7 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, pr_debug("loading %s\n", attr->file); - return __bpf_object__open(attr->file, NULL, 0, - bpf_prog_type__needs_kver(attr->prog_type), - flags); + return __bpf_object__open(attr->file, NULL, 0, NULL, flags); } struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -3655,25 +3608,67 @@ struct bpf_object *bpf_object__open(const char *path) return bpf_object__open_xattr(&attr); } -struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz, - const char *name) +struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) +{ + const char *obj_name; + bool relaxed_maps; + + if (!OPTS_VALID(opts, bpf_object_open_opts)) + return ERR_PTR(-EINVAL); + if (!path) + return ERR_PTR(-EINVAL); + + pr_debug("loading %s\n", path); + + obj_name = OPTS_GET(opts, object_name, path); + relaxed_maps = OPTS_GET(opts, relaxed_maps, false); + return __bpf_object__open(path, NULL, 0, obj_name, + relaxed_maps ? MAPS_RELAX_COMPAT : 0); +} + +struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts) { char tmp_name[64]; + const char *obj_name; + bool relaxed_maps; - /* param validation */ - if (!obj_buf || obj_buf_sz <= 0) - return NULL; + if (!OPTS_VALID(opts, bpf_object_open_opts)) + return ERR_PTR(-EINVAL); + if (!obj_buf || obj_buf_sz == 0) + return ERR_PTR(-EINVAL); - if (!name) { + obj_name = OPTS_GET(opts, object_name, NULL); + if (!obj_name) { snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", (unsigned long)obj_buf, (unsigned long)obj_buf_sz); - name = tmp_name; + obj_name = tmp_name; } - pr_debug("loading object '%s' from buffer\n", name); + pr_debug("loading object '%s' from buffer\n", obj_name); + + relaxed_maps = OPTS_GET(opts, relaxed_maps, false); + return __bpf_object__open(obj_name, obj_buf, obj_buf_sz, obj_name, + relaxed_maps ? MAPS_RELAX_COMPAT : 0); +} + +struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, + const char *name) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, + .object_name = name, + /* wrong default, but backwards-compatible */ + .relaxed_maps = true, + ); + + /* returning NULL is wrong, but backwards-compatible */ + if (!obj_buf || obj_buf_sz == 0) + return NULL; - return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true); + return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); } int bpf_object__unload(struct bpf_object *obj) @@ -4236,7 +4231,7 @@ bpf_object__next(struct bpf_object *prev) const char *bpf_object__name(const struct bpf_object *obj) { - return obj ? obj->path : ERR_PTR(-EINVAL); + return obj ? obj->name : ERR_PTR(-EINVAL); } unsigned int bpf_object__kversion(const struct bpf_object *obj) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e8f70977d137..667e6853e51f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -67,14 +67,52 @@ struct bpf_object_open_attr { enum bpf_prog_type prog_type; }; +/* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, + * followed by assignment using compound literal syntax is done to preserve + * ability to use a nice struct field initialization syntax and **hopefully** + * have all the padding bytes initialized to zero. It's not guaranteed though, + * when copying literal, that compiler won't copy garbage in literal's padding + * bytes, but that's the best way I've found and it seems to work in practice. + */ +#define LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME; \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + NAME = (struct TYPE) { \ + .sz = sizeof(struct TYPE), \ + __VA_ARGS__ \ + } + +struct bpf_object_open_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* object name override, if provided: + * - for object open from file, this will override setting object + * name from file path's base name; + * - for object open from memory buffer, this will specify an object + * name and will override default "<addr>-<buf-size>" name; + */ + const char *object_name; + /* parse map definitions non-strictly, allowing extra attributes/data */ + bool relaxed_maps; +}; +#define bpf_object_open_opts__last_field relaxed_maps + LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); +LIBBPF_API struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts); + +/* deprecated bpf_object__open variants */ +LIBBPF_API struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, + const char *name); +LIBBPF_API struct bpf_object * bpf_object__open_xattr(struct bpf_object_open_attr *attr); -struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, - int flags); -LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz, - const char *name); + int bpf_object__section_size(const struct bpf_object *obj, const char *name, __u32 *size); int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d04c7cb623ed..4d241fd92dd4 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -190,3 +190,9 @@ LIBBPF_0.0.5 { global: bpf_btf_get_next_id; } LIBBPF_0.0.4; + +LIBBPF_0.0.6 { + global: + bpf_object__open_file; + bpf_object__open_mem; +} LIBBPF_0.0.5; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 98216a69c32f..f4ff8c3dae72 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -63,6 +63,38 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +static inline bool libbpf_validate_opts(const char *opts, + size_t opts_sz, size_t user_sz, + const char *type_name) +{ + if (user_sz < sizeof(size_t)) { + pr_warning("%s size (%zu) is too small\n", type_name, user_sz); + return false; + } + if (user_sz > opts_sz) { + size_t i; + + for (i = opts_sz; i < user_sz; i++) { + if (opts[i]) { + pr_warning("%s has non-zero extra bytes", + type_name); + return false; + } + } + } + return true; +} + +#define OPTS_VALID(opts, type) \ + (!(opts) || libbpf_validate_opts((const char *)opts, \ + offsetofend(struct type, \ + type##__last_field), \ + (opts)->sz, #type)) +#define OPTS_HAS(opts, field) \ + ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) +#define OPTS_GET(opts, field, fallback_value) \ + (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) + int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.c index fc134873bb6d..f0eb2727b766 100644 --- a/tools/lib/bpf/test_libbpf.cpp +++ b/tools/lib/bpf/test_libbpf.c @@ -7,12 +7,14 @@ int main(int argc, char *argv[]) { - /* libbpf.h */ - libbpf_set_print(NULL); + /* libbpf.h */ + libbpf_set_print(NULL); - /* bpf.h */ - bpf_prog_get_fd_by_id(0); + /* bpf.h */ + bpf_prog_get_fd_by_id(0); - /* btf.h */ - btf__new(NULL, 0); + /* btf.h */ + btf__new(NULL, 0); + + return 0; } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a902838f9fcc..9d5348086203 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -163,6 +163,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, umem->umem_area = umem_area; xsk_set_umem_config(&umem->config, usr_config); + memset(&mr, 0, sizeof(mr)); mr.addr = (uintptr_t)umem_area; mr.len = size; mr.chunk_size = umem->config.frame_size; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6889c19a628c..00d05c5e2d57 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,8 +15,6 @@ endif CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy -LLVM_READELF ?= llvm-readelf -BTF_PAHOLE ?= pahole BPF_GCC ?= $(shell command -v bpf-gcc;) CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \ -Dbpf_prog_load=bpf_prog_test_load \ @@ -29,7 +27,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_btf_dump test_cgroup_attach xdping + test_cgroup_attach xdping BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -126,16 +124,6 @@ force: $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ -PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) - -# Let newer LLVM versions transparently probe the kernel for availability -# of full BPF instruction set. -ifeq ($(PROBE),) - CPU ?= probe -else - CPU ?= generic -endif - # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. @@ -147,8 +135,9 @@ $(shell $(1) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') endef CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) -BPF_CFLAGS = -I. -I./include/uapi -I../../../include/uapi \ - -I$(OUTPUT)/../usr/include -D__TARGET_ARCH_$(SRCARCH) +BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ + -I. -I./include/uapi -I../../../include/uapi \ + -I$(BPFDIR) -I$(OUTPUT)/../usr/include CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types @@ -162,28 +151,6 @@ $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h $(OUTPUT)/test_progs.o: flow_dissector_load.h -BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) -BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) -BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') -BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ - $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ - $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ - /bin/rm -f ./llvm_btf_verify.o) - -ifneq ($(BTF_LLVM_PROBE),) - BPF_CFLAGS += -g -else -ifneq ($(BTF_LLC_PROBE),) -ifneq ($(BTF_PAHOLE_PROBE),) -ifneq ($(BTF_OBJCOPY_PROBE),) - BPF_CFLAGS += -g - LLC_FLAGS += -mattr=dwarfris - DWARF2BTF = y -endif -endif -endif -endif - TEST_PROGS_CFLAGS := -I. -I$(OUTPUT) TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier @@ -212,11 +179,8 @@ $(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR)/test_progs_32 \ | $(ALU32_BUILD_DIR) ($(CLANG) $(BPF_CFLAGS) $(CLANG_CFLAGS) -O2 -target bpf -emit-llvm \ -c $< -o - || echo "clang failed") | \ - $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ + $(LLC) -march=bpf -mcpu=probe -mattr=+alu32 $(LLC_FLAGS) \ -filetype=obj -o $@ -ifeq ($(DWARF2BTF),y) - $(BTF_PAHOLE) -J $@ -endif endif ifneq ($(BPF_GCC),) @@ -251,18 +215,13 @@ endif $(OUTPUT)/test_xdp.o: progs/test_xdp.c ($(CLANG) $(BPF_CFLAGS) $(CLANG_CFLAGS) -O2 -emit-llvm -c $< -o - || \ echo "clang failed") | \ - $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ -ifeq ($(DWARF2BTF),y) - $(BTF_PAHOLE) -J $@ -endif + $(LLC) -march=bpf -mcpu=probe $(LLC_FLAGS) -filetype=obj -o $@ -$(OUTPUT)/%.o: progs/%.c +# libbpf has to be built before BPF programs due to bpf_helper_defs.h +$(OUTPUT)/%.o: progs/%.c | $(BPFOBJ) ($(CLANG) $(BPF_CFLAGS) $(CLANG_CFLAGS) -O2 -target bpf -emit-llvm \ -c $< -o - || echo "clang failed") | \ - $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ -ifeq ($(DWARF2BTF),y) - $(BTF_PAHOLE) -J $@ -endif + $(LLC) -march=bpf -mcpu=probe $(LLC_FLAGS) -filetype=obj -o $@ PROG_TESTS_DIR = $(OUTPUT)/prog_tests $(PROG_TESTS_DIR): @@ -271,7 +230,7 @@ PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h PROG_TESTS_FILES := $(wildcard prog_tests/*.c) test_progs.c: $(PROG_TESTS_H) $(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS) -$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_FILES) | $(PROG_TESTS_H) +$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_FILES) | $(OUTPUT)/test_attach_probe.o $(PROG_TESTS_H) $(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR) $(shell ( cd prog_tests/; \ echo '/* Generated header, do not edit */'; \ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h deleted file mode 100644 index 54a50699bbfd..000000000000 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ /dev/null @@ -1,535 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_HELPERS__ -#define __BPF_HELPERS__ - -#define __uint(name, val) int (*name)[val] -#define __type(name, val) val *name - -/* helper macro to print out debug messages */ -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - -#ifdef __clang__ - -/* helper macro to place programs, maps, license in - * different sections in elf_bpf file. Section names - * are interpreted by elf_bpf loader - */ -#define SEC(NAME) __attribute__((section(NAME), used)) - -/* helper functions called from eBPF programs written in C */ -static void *(*bpf_map_lookup_elem)(void *map, const void *key) = - (void *) BPF_FUNC_map_lookup_elem; -static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, - unsigned long long flags) = - (void *) BPF_FUNC_map_update_elem; -static int (*bpf_map_delete_elem)(void *map, const void *key) = - (void *) BPF_FUNC_map_delete_elem; -static int (*bpf_map_push_elem)(void *map, const void *value, - unsigned long long flags) = - (void *) BPF_FUNC_map_push_elem; -static int (*bpf_map_pop_elem)(void *map, void *value) = - (void *) BPF_FUNC_map_pop_elem; -static int (*bpf_map_peek_elem)(void *map, void *value) = - (void *) BPF_FUNC_map_peek_elem; -static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) = - (void *) BPF_FUNC_probe_read; -static unsigned long long (*bpf_ktime_get_ns)(void) = - (void *) BPF_FUNC_ktime_get_ns; -static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = - (void *) BPF_FUNC_trace_printk; -static void (*bpf_tail_call)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_tail_call; -static unsigned long long (*bpf_get_smp_processor_id)(void) = - (void *) BPF_FUNC_get_smp_processor_id; -static unsigned long long (*bpf_get_current_pid_tgid)(void) = - (void *) BPF_FUNC_get_current_pid_tgid; -static unsigned long long (*bpf_get_current_uid_gid)(void) = - (void *) BPF_FUNC_get_current_uid_gid; -static int (*bpf_get_current_comm)(void *buf, int buf_size) = - (void *) BPF_FUNC_get_current_comm; -static unsigned long long (*bpf_perf_event_read)(void *map, - unsigned long long flags) = - (void *) BPF_FUNC_perf_event_read; -static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = - (void *) BPF_FUNC_clone_redirect; -static int (*bpf_redirect)(int ifindex, int flags) = - (void *) BPF_FUNC_redirect; -static int (*bpf_redirect_map)(void *map, int key, int flags) = - (void *) BPF_FUNC_redirect_map; -static int (*bpf_perf_event_output)(void *ctx, void *map, - unsigned long long flags, void *data, - int size) = - (void *) BPF_FUNC_perf_event_output; -static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = - (void *) BPF_FUNC_get_stackid; -static int (*bpf_probe_write_user)(void *dst, const void *src, int size) = - (void *) BPF_FUNC_probe_write_user; -static int (*bpf_current_task_under_cgroup)(void *map, int index) = - (void *) BPF_FUNC_current_task_under_cgroup; -static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_get_tunnel_key; -static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_set_tunnel_key; -static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_get_tunnel_opt; -static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_set_tunnel_opt; -static unsigned long long (*bpf_get_prandom_u32)(void) = - (void *) BPF_FUNC_get_prandom_u32; -static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_head; -static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_meta; -static int (*bpf_get_socket_cookie)(void *ctx) = - (void *) BPF_FUNC_get_socket_cookie; -static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, - int optlen) = - (void *) BPF_FUNC_setsockopt; -static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, - int optlen) = - (void *) BPF_FUNC_getsockopt; -static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = - (void *) BPF_FUNC_sock_ops_cb_flags_set; -static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = - (void *) BPF_FUNC_sk_redirect_map; -static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) = - (void *) BPF_FUNC_sk_redirect_hash; -static int (*bpf_sock_map_update)(void *map, void *key, void *value, - unsigned long long flags) = - (void *) BPF_FUNC_sock_map_update; -static int (*bpf_sock_hash_update)(void *map, void *key, void *value, - unsigned long long flags) = - (void *) BPF_FUNC_sock_hash_update; -static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, - void *buf, unsigned int buf_size) = - (void *) BPF_FUNC_perf_event_read_value; -static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, - unsigned int buf_size) = - (void *) BPF_FUNC_perf_prog_read_value; -static int (*bpf_override_return)(void *ctx, unsigned long rc) = - (void *) BPF_FUNC_override_return; -static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) = - (void *) BPF_FUNC_msg_redirect_map; -static int (*bpf_msg_redirect_hash)(void *ctx, - void *map, void *key, int flags) = - (void *) BPF_FUNC_msg_redirect_hash; -static int (*bpf_msg_apply_bytes)(void *ctx, int len) = - (void *) BPF_FUNC_msg_apply_bytes; -static int (*bpf_msg_cork_bytes)(void *ctx, int len) = - (void *) BPF_FUNC_msg_cork_bytes; -static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = - (void *) BPF_FUNC_msg_pull_data; -static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = - (void *) BPF_FUNC_msg_push_data; -static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = - (void *) BPF_FUNC_msg_pop_data; -static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = - (void *) BPF_FUNC_bind; -static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_tail; -static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, - int size, int flags) = - (void *) BPF_FUNC_skb_get_xfrm_state; -static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) = - (void *) BPF_FUNC_sk_select_reuseport; -static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = - (void *) BPF_FUNC_get_stack; -static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, - int plen, __u32 flags) = - (void *) BPF_FUNC_fib_lookup; -static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr, - unsigned int len) = - (void *) BPF_FUNC_lwt_push_encap; -static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset, - void *from, unsigned int len) = - (void *) BPF_FUNC_lwt_seg6_store_bytes; -static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param, - unsigned int param_len) = - (void *) BPF_FUNC_lwt_seg6_action; -static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset, - unsigned int len) = - (void *) BPF_FUNC_lwt_seg6_adjust_srh; -static int (*bpf_rc_repeat)(void *ctx) = - (void *) BPF_FUNC_rc_repeat; -static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol, - unsigned long long scancode, unsigned int toggle) = - (void *) BPF_FUNC_rc_keydown; -static unsigned long long (*bpf_get_current_cgroup_id)(void) = - (void *) BPF_FUNC_get_current_cgroup_id; -static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) = - (void *) BPF_FUNC_get_local_storage; -static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = - (void *) BPF_FUNC_skb_cgroup_id; -static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = - (void *) BPF_FUNC_skb_ancestor_cgroup_id; -static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, - struct bpf_sock_tuple *tuple, - int size, unsigned long long netns_id, - unsigned long long flags) = - (void *) BPF_FUNC_sk_lookup_tcp; -static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, - struct bpf_sock_tuple *tuple, - int size, unsigned long long netns_id, - unsigned long long flags) = - (void *) BPF_FUNC_skc_lookup_tcp; -static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, - struct bpf_sock_tuple *tuple, - int size, unsigned long long netns_id, - unsigned long long flags) = - (void *) BPF_FUNC_sk_lookup_udp; -static int (*bpf_sk_release)(struct bpf_sock *sk) = - (void *) BPF_FUNC_sk_release; -static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = - (void *) BPF_FUNC_skb_vlan_push; -static int (*bpf_skb_vlan_pop)(void *ctx) = - (void *) BPF_FUNC_skb_vlan_pop; -static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = - (void *) BPF_FUNC_rc_pointer_rel; -static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) = - (void *) BPF_FUNC_spin_lock; -static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = - (void *) BPF_FUNC_spin_unlock; -static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = - (void *) BPF_FUNC_sk_fullsock; -static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = - (void *) BPF_FUNC_tcp_sock; -static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = - (void *) BPF_FUNC_get_listener_sock; -static int (*bpf_skb_ecn_set_ce)(void *ctx) = - (void *) BPF_FUNC_skb_ecn_set_ce; -static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, - void *ip, int ip_len, void *tcp, int tcp_len) = - (void *) BPF_FUNC_tcp_check_syncookie; -static int (*bpf_sysctl_get_name)(void *ctx, char *buf, - unsigned long long buf_len, - unsigned long long flags) = - (void *) BPF_FUNC_sysctl_get_name; -static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf, - unsigned long long buf_len) = - (void *) BPF_FUNC_sysctl_get_current_value; -static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf, - unsigned long long buf_len) = - (void *) BPF_FUNC_sysctl_get_new_value; -static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf, - unsigned long long buf_len) = - (void *) BPF_FUNC_sysctl_set_new_value; -static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, - unsigned long long flags, long *res) = - (void *) BPF_FUNC_strtol; -static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, - unsigned long long flags, unsigned long *res) = - (void *) BPF_FUNC_strtoul; -static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, - void *value, __u64 flags) = - (void *) BPF_FUNC_sk_storage_get; -static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = - (void *)BPF_FUNC_sk_storage_delete; -static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal; -static long long (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *ip, - int ip_len, void *tcp, int tcp_len) = - (void *) BPF_FUNC_tcp_gen_syncookie; - -/* llvm builtin functions that eBPF C program may use to - * emit BPF_LD_ABS and BPF_LD_IND instructions - */ -struct sk_buff; -unsigned long long load_byte(void *skb, - unsigned long long off) asm("llvm.bpf.load.byte"); -unsigned long long load_half(void *skb, - unsigned long long off) asm("llvm.bpf.load.half"); -unsigned long long load_word(void *skb, - unsigned long long off) asm("llvm.bpf.load.word"); - -/* a helper structure used by eBPF C program - * to describe map attributes to elf_bpf loader - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - -#else - -#include <bpf-helpers.h> - -#endif - -#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ - struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ - }; \ - struct ____btf_map_##name \ - __attribute__ ((section(".maps." #name), used)) \ - ____btf_map_##name = { } - -static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = - (void *) BPF_FUNC_skb_load_bytes; -static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) = - (void *) BPF_FUNC_skb_load_bytes_relative; -static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = - (void *) BPF_FUNC_skb_store_bytes; -static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l3_csum_replace; -static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l4_csum_replace; -static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) = - (void *) BPF_FUNC_csum_diff; -static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_skb_under_cgroup; -static int (*bpf_skb_change_head)(void *, int len, int flags) = - (void *) BPF_FUNC_skb_change_head; -static int (*bpf_skb_pull_data)(void *, int len) = - (void *) BPF_FUNC_skb_pull_data; -static unsigned int (*bpf_get_cgroup_classid)(void *ctx) = - (void *) BPF_FUNC_get_cgroup_classid; -static unsigned int (*bpf_get_route_realm)(void *ctx) = - (void *) BPF_FUNC_get_route_realm; -static int (*bpf_skb_change_proto)(void *ctx, __be16 proto, __u64 flags) = - (void *) BPF_FUNC_skb_change_proto; -static int (*bpf_skb_change_type)(void *ctx, __u32 type) = - (void *) BPF_FUNC_skb_change_type; -static unsigned int (*bpf_get_hash_recalc)(void *ctx) = - (void *) BPF_FUNC_get_hash_recalc; -static unsigned long long (*bpf_get_current_task)(void) = - (void *) BPF_FUNC_get_current_task; -static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) = - (void *) BPF_FUNC_skb_change_tail; -static long long (*bpf_csum_update)(void *ctx, __u32 csum) = - (void *) BPF_FUNC_csum_update; -static void (*bpf_set_hash_invalid)(void *ctx) = - (void *) BPF_FUNC_set_hash_invalid; -static int (*bpf_get_numa_node_id)(void) = - (void *) BPF_FUNC_get_numa_node_id; -static int (*bpf_probe_read_str)(void *ctx, __u32 size, - const void *unsafe_ptr) = - (void *) BPF_FUNC_probe_read_str; -static unsigned int (*bpf_get_socket_uid)(void *ctx) = - (void *) BPF_FUNC_get_socket_uid; -static unsigned int (*bpf_set_hash)(void *ctx, __u32 hash) = - (void *) BPF_FUNC_set_hash; -static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, - unsigned long long flags) = - (void *) BPF_FUNC_skb_adjust_room; - -/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ -#if defined(__TARGET_ARCH_x86) - #define bpf_target_x86 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_s390) - #define bpf_target_s390 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_arm) - #define bpf_target_arm - #define bpf_target_defined -#elif defined(__TARGET_ARCH_arm64) - #define bpf_target_arm64 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_mips) - #define bpf_target_mips - #define bpf_target_defined -#elif defined(__TARGET_ARCH_powerpc) - #define bpf_target_powerpc - #define bpf_target_defined -#elif defined(__TARGET_ARCH_sparc) - #define bpf_target_sparc - #define bpf_target_defined -#else - #undef bpf_target_defined -#endif - -/* Fall back to what the compiler says */ -#ifndef bpf_target_defined -#if defined(__x86_64__) - #define bpf_target_x86 -#elif defined(__s390__) - #define bpf_target_s390 -#elif defined(__arm__) - #define bpf_target_arm -#elif defined(__aarch64__) - #define bpf_target_arm64 -#elif defined(__mips__) - #define bpf_target_mips -#elif defined(__powerpc__) - #define bpf_target_powerpc -#elif defined(__sparc__) - #define bpf_target_sparc -#endif -#endif - -#if defined(bpf_target_x86) - -#ifdef __KERNEL__ -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) -#else -#ifdef __i386__ -/* i386 kernel is built with -mregparm=3 */ -#define PT_REGS_PARM1(x) ((x)->eax) -#define PT_REGS_PARM2(x) ((x)->edx) -#define PT_REGS_PARM3(x) ((x)->ecx) -#define PT_REGS_PARM4(x) 0 -#define PT_REGS_PARM5(x) 0 -#define PT_REGS_RET(x) ((x)->esp) -#define PT_REGS_FP(x) ((x)->ebp) -#define PT_REGS_RC(x) ((x)->eax) -#define PT_REGS_SP(x) ((x)->esp) -#define PT_REGS_IP(x) ((x)->eip) -#else -#define PT_REGS_PARM1(x) ((x)->rdi) -#define PT_REGS_PARM2(x) ((x)->rsi) -#define PT_REGS_PARM3(x) ((x)->rdx) -#define PT_REGS_PARM4(x) ((x)->rcx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->rsp) -#define PT_REGS_FP(x) ((x)->rbp) -#define PT_REGS_RC(x) ((x)->rax) -#define PT_REGS_SP(x) ((x)->rsp) -#define PT_REGS_IP(x) ((x)->rip) -#endif -#endif - -#elif defined(bpf_target_s390) - -/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_S390 const volatile user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) -#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) -#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) -#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) -#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) -#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) -#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) - -#elif defined(bpf_target_arm) - -#define PT_REGS_PARM1(x) ((x)->uregs[0]) -#define PT_REGS_PARM2(x) ((x)->uregs[1]) -#define PT_REGS_PARM3(x) ((x)->uregs[2]) -#define PT_REGS_PARM4(x) ((x)->uregs[3]) -#define PT_REGS_PARM5(x) ((x)->uregs[4]) -#define PT_REGS_RET(x) ((x)->uregs[14]) -#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->uregs[0]) -#define PT_REGS_SP(x) ((x)->uregs[13]) -#define PT_REGS_IP(x) ((x)->uregs[12]) - -#elif defined(bpf_target_arm64) - -/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_ARM64 const volatile struct user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) -#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) -#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) -#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) -#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) -#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) - -#elif defined(bpf_target_mips) - -#define PT_REGS_PARM1(x) ((x)->regs[4]) -#define PT_REGS_PARM2(x) ((x)->regs[5]) -#define PT_REGS_PARM3(x) ((x)->regs[6]) -#define PT_REGS_PARM4(x) ((x)->regs[7]) -#define PT_REGS_PARM5(x) ((x)->regs[8]) -#define PT_REGS_RET(x) ((x)->regs[31]) -#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[1]) -#define PT_REGS_SP(x) ((x)->regs[29]) -#define PT_REGS_IP(x) ((x)->cp0_epc) - -#elif defined(bpf_target_powerpc) - -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#elif defined(bpf_target_sparc) - -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) - -/* Should this also be a bpf_target check for the sparc case? */ -#if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#else -#define PT_REGS_IP(x) ((x)->pc) -#endif - -#endif - -#if defined(bpf_target_powerpc) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_sparc) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#else -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) -#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), \ - (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) -#endif - -/* - * BPF_CORE_READ abstracts away bpf_probe_read() call and captures offset - * relocation for source address using __builtin_preserve_access_index() - * built-in, provided by Clang. - * - * __builtin_preserve_access_index() takes as an argument an expression of - * taking an address of a field within struct/union. It makes compiler emit - * a relocation, which records BTF type ID describing root struct/union and an - * accessor string which describes exact embedded field that was used to take - * an address. See detailed description of this relocation format and - * semantics in comments to struct bpf_offset_reloc in libbpf_internal.h. - * - * This relocation allows libbpf to adjust BPF instruction to use correct - * actual field offset, based on target kernel BTF type that matches original - * (local) BTF, used to record relocation. - */ -#define BPF_CORE_READ(dst, src) \ - bpf_probe_read((dst), sizeof(*(src)), \ - __builtin_preserve_access_index(src)) - -#endif diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h new file mode 100644 index 000000000000..6f8988738bc1 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_LEGACY__ +#define __BPF_LEGACY__ + +/* + * legacy bpf_map_def with extra fields supported only by bpf_load(), do not + * use outside of samples/bpf + */ +struct bpf_map_def_legacy { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; + unsigned int inner_map_idx; + unsigned int numa_node; +}; + +#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ + struct ____btf_map_##name { \ + type_key key; \ + type_val value; \ + }; \ + struct ____btf_map_##name \ + __attribute__ ((section(".maps." #name), used)) \ + ____btf_map_##name = { } + +/* llvm builtin functions that eBPF C program may use to + * emit BPF_LD_ABS and BPF_LD_IND instructions + */ +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +#endif + diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e95c33e333a4..0fb910df5387 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -41,7 +41,7 @@ * * If successful, 0 is returned. */ -int enable_all_controllers(char *cgroup_path) +static int enable_all_controllers(char *cgroup_path) { char path[PATH_MAX + 1]; char buf[PATH_MAX]; @@ -98,7 +98,7 @@ int enable_all_controllers(char *cgroup_path) */ int setup_cgroup_environment(void) { - char cgroup_workdir[PATH_MAX + 1]; + char cgroup_workdir[PATH_MAX - 24]; format_cgroup_path(cgroup_workdir, ""); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 5ecc267d98b0..4f50d32c4abb 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -1,6 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#define EMBED_FILE(NAME, PATH) \ +asm ( \ +" .pushsection \".rodata\", \"a\", @progbits \n" \ +" .global "#NAME"_data \n" \ +#NAME"_data: \n" \ +" .incbin \"" PATH "\" \n" \ +#NAME"_data_end: \n" \ +" .global "#NAME"_size \n" \ +" .type "#NAME"_size, @object \n" \ +" .size "#NAME"_size, 4 \n" \ +" .align 4, \n" \ +#NAME"_size: \n" \ +" .int "#NAME"_data_end - "#NAME"_data \n" \ +" .popsection \n" \ +); \ +extern char NAME##_data[]; \ +extern int NAME##_size; + ssize_t get_base_addr() { size_t start; char buf[256]; @@ -21,6 +39,8 @@ ssize_t get_base_addr() { return -EINVAL; } +EMBED_FILE(probe, "test_attach_probe.o"); + void test_attach_probe(void) { const char *kprobe_name = "kprobe/sys_nanosleep"; @@ -29,11 +49,15 @@ void test_attach_probe(void) const char *uretprobe_name = "uretprobe/trigger_func"; const int kprobe_idx = 0, kretprobe_idx = 1; const int uprobe_idx = 2, uretprobe_idx = 3; - const char *file = "./test_attach_probe.o"; + const char *obj_name = "attach_probe"; + LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .object_name = obj_name, + .relaxed_maps = true, + ); struct bpf_program *kprobe_prog, *kretprobe_prog; struct bpf_program *uprobe_prog, *uretprobe_prog; struct bpf_object *obj; - int err, prog_fd, duration = 0, res; + int err, duration = 0, res; struct bpf_link *kprobe_link = NULL; struct bpf_link *kretprobe_link = NULL; struct bpf_link *uprobe_link = NULL; @@ -48,11 +72,16 @@ void test_attach_probe(void) return; uprobe_offset = (size_t)&get_base_addr - base_addr; - /* load programs */ - err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + /* open object */ + obj = bpf_object__open_mem(probe_data, probe_size, &open_opts); + if (CHECK(IS_ERR(obj), "obj_open_mem", "err %ld\n", PTR_ERR(obj))) return; + if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", + "wrong obj name '%s', expected '%s'\n", + bpf_object__name(obj), obj_name)) + goto cleanup; + kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); if (CHECK(!kprobe_prog, "find_probe", "prog '%s' not found\n", kprobe_name)) @@ -70,6 +99,16 @@ void test_attach_probe(void) "prog '%s' not found\n", uretprobe_name)) goto cleanup; + bpf_program__set_kprobe(kprobe_prog); + bpf_program__set_kprobe(kretprobe_prog); + bpf_program__set_kprobe(uprobe_prog); + bpf_program__set_kprobe(uretprobe_prog); + + /* create maps && load programs */ + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup; + /* load maps */ results_map_fd = bpf_find_map(__func__, obj, "results_map"); if (CHECK(results_map_fd < 0, "find_results_map", diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 6e75dd3cb14f..7390d3061065 100644 --- a/tools/testing/selftests/bpf/test_btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -1,36 +1,26 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <linux/err.h> -#include <btf.h> - -#define CHECK(condition, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ - fprintf(stderr, format); \ - } \ - __ret; \ -}) +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <bpf/btf.h> + +static int duration = 0; void btf_dump_printf(void *ctx, const char *fmt, va_list args) { vfprintf(ctx, fmt, args); } -struct btf_dump_test_case { +static struct btf_dump_test_case { const char *name; + const char *file; struct btf_dump_opts opts; } btf_dump_test_cases[] = { - {.name = "btf_dump_test_case_syntax", .opts = {}}, - {.name = "btf_dump_test_case_ordering", .opts = {}}, - {.name = "btf_dump_test_case_padding", .opts = {}}, - {.name = "btf_dump_test_case_packing", .opts = {}}, - {.name = "btf_dump_test_case_bitfields", .opts = {}}, - {.name = "btf_dump_test_case_multidim", .opts = {}}, - {.name = "btf_dump_test_case_namespacing", .opts = {}}, + {"btf_dump: syntax", "btf_dump_test_case_syntax", {}}, + {"btf_dump: ordering", "btf_dump_test_case_ordering", {}}, + {"btf_dump: padding", "btf_dump_test_case_padding", {}}, + {"btf_dump: packing", "btf_dump_test_case_packing", {}}, + {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}}, + {"btf_dump: multidim", "btf_dump_test_case_multidim", {}}, + {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}}, }; static int btf_dump_all_types(const struct btf *btf, @@ -55,55 +45,51 @@ done: return err; } -int test_btf_dump_case(int n, struct btf_dump_test_case *test_case) +static int test_btf_dump_case(int n, struct btf_dump_test_case *t) { char test_file[256], out_file[256], diff_cmd[1024]; struct btf *btf = NULL; int err = 0, fd = -1; FILE *f = NULL; - fprintf(stderr, "Test case #%d (%s): ", n, test_case->name); - - snprintf(test_file, sizeof(test_file), "%s.o", test_case->name); + snprintf(test_file, sizeof(test_file), "%s.o", t->file); btf = btf__parse_elf(test_file, NULL); - if (CHECK(IS_ERR(btf), + if (CHECK(IS_ERR(btf), "btf_parse_elf", "failed to load test BTF: %ld\n", PTR_ERR(btf))) { err = -PTR_ERR(btf); btf = NULL; goto done; } - snprintf(out_file, sizeof(out_file), - "/tmp/%s.output.XXXXXX", test_case->name); + snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file); fd = mkstemp(out_file); - if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) { + if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) { err = fd; goto done; } f = fdopen(fd, "w"); - if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n", + if (CHECK(f == NULL, "open_tmp", "failed to open file: %s(%d)\n", strerror(errno), errno)) { close(fd); goto done; } - test_case->opts.ctx = f; - err = btf_dump_all_types(btf, &test_case->opts); + t->opts.ctx = f; + err = btf_dump_all_types(btf, &t->opts); fclose(f); close(fd); - if (CHECK(err, "failure during C dumping: %d\n", err)) { + if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) { goto done; } - snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name); + snprintf(test_file, sizeof(test_file), "progs/%s.c", t->file); if (access(test_file, R_OK) == -1) /* * When the test is run with O=, kselftest copies TEST_FILES * without preserving the directory structure. */ - snprintf(test_file, sizeof(test_file), "%s.c", - test_case->name); + snprintf(test_file, sizeof(test_file), "%s.c", t->file); /* * Diff test output and expected test output, contained between * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case. @@ -118,33 +104,27 @@ int test_btf_dump_case(int n, struct btf_dump_test_case *test_case) "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'", test_file, out_file); err = system(diff_cmd); - if (CHECK(err, + if (CHECK(err, "diff", "differing test output, output=%s, err=%d, diff cmd:\n%s\n", out_file, err, diff_cmd)) goto done; remove(out_file); - fprintf(stderr, "OK\n"); done: btf__free(btf); return err; } -int main() { - int test_case_cnt, i, err, failed = 0; - - test_case_cnt = sizeof(btf_dump_test_cases) / - sizeof(btf_dump_test_cases[0]); +void test_btf_dump() { + int i; - for (i = 0; i < test_case_cnt; i++) { - err = test_btf_dump_case(i, &btf_dump_test_cases[i]); - if (err) - failed++; - } + for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) { + struct btf_dump_test_case *t = &btf_dump_test_cases[i]; - fprintf(stderr, "%d tests succeeded, %d tests failed.\n", - test_case_cnt - failed, failed); + if (!test__start_subtest(t->name)) + continue; - return failed; + test_btf_dump_case(i, &btf_dump_test_cases[i]); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index f3863f976a48..21a0dff66241 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -193,8 +193,12 @@ static struct core_reloc_test_case test_cases[] = { .btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */ .input = "", .input_len = 0, - .output = "\1", /* true */ - .output_len = 1, + .output = STRUCT_TO_CHAR_PTR(core_reloc_kernel_output) { + .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + .comm = "test_progs\0\0\0\0\0", + .comm_len = 11, + }, + .output_len = sizeof(struct core_reloc_kernel_output), }, /* validate BPF program can use multiple flavors to match against diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c new file mode 100644 index 000000000000..777faffc4639 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that the flow_dissector program can be updated with a single + * syscall by attaching a new program that replaces the existing one. + * + * Corner case - the same program cannot be attached twice. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <unistd.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> + +#include "test_progs.h" + +static bool is_attached(int netns) +{ + __u32 cnt; + int err; + + err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt); + if (CHECK_FAIL(err)) { + perror("bpf_prog_query"); + return true; /* fail-safe */ + } + + return cnt > 0; +} + +static int load_prog(void) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, BPF_OK), + BPF_EXIT_INSN(), + }; + int fd; + + fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + if (CHECK_FAIL(fd < 0)) + perror("bpf_load_program"); + + return fd; +} + +static void do_flow_dissector_reattach(void) +{ + int prog_fd[2] = { -1, -1 }; + int err; + + prog_fd[0] = load_prog(); + if (prog_fd[0] < 0) + return; + + prog_fd[1] = load_prog(); + if (prog_fd[1] < 0) + goto out_close; + + err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach-0"); + goto out_close; + } + + /* Expect success when attaching a different program */ + err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach-1"); + goto out_detach; + } + + /* Expect failure when attaching the same program twice */ + err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(!err || errno != EINVAL)) + perror("bpf_prog_attach-2"); + +out_detach: + err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + if (CHECK_FAIL(err)) + perror("bpf_prog_detach"); + +out_close: + close(prog_fd[1]); + close(prog_fd[0]); +} + +void test_flow_dissector_reattach(void) +{ + int init_net, err; + + init_net = open("/proc/1/ns/net", O_RDONLY); + if (CHECK_FAIL(init_net < 0)) { + perror("open(/proc/1/ns/net)"); + return; + } + + err = setns(init_net, CLONE_NEWNET); + if (CHECK_FAIL(err)) { + perror("setns(/proc/1/ns/net)"); + goto out_close; + } + + if (is_attached(init_net)) { + test__skip(); + printf("Can't test with flow dissector attached to init_net\n"); + return; + } + + /* First run tests in root network namespace */ + do_flow_dissector_reattach(); + + /* Then repeat tests in a non-root namespace */ + err = unshare(CLONE_NEWNET); + if (CHECK_FAIL(err)) { + perror("unshare(CLONE_NEWNET)"); + goto out_close; + } + do_flow_dissector_reattach(); + +out_close: + close(init_net); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c new file mode 100644 index 000000000000..9bf9de0aaeea --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +struct bss { + unsigned did_run; + unsigned iters; + unsigned sum; +}; + +struct rdonly_map_subtest { + const char *subtest_name; + const char *prog_name; + unsigned exp_iters; + unsigned exp_sum; +}; + +void test_rdonly_maps(void) +{ + const char *prog_name_skip_loop = "raw_tracepoint/sys_enter:skip_loop"; + const char *prog_name_part_loop = "raw_tracepoint/sys_enter:part_loop"; + const char *prog_name_full_loop = "raw_tracepoint/sys_enter:full_loop"; + const char *file = "test_rdonly_maps.o"; + struct rdonly_map_subtest subtests[] = { + { "skip loop", prog_name_skip_loop, 0, 0 }, + { "part loop", prog_name_part_loop, 3, 2 + 3 + 4 }, + { "full loop", prog_name_full_loop, 4, 2 + 3 + 4 + 5 }, + }; + int i, err, zero = 0, duration = 0; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + struct bss bss; + + obj = bpf_object__open_file(file, NULL); + if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return; + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_raw_tracepoint(prog); + } + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss"); + if (CHECK(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(subtests); i++) { + const struct rdonly_map_subtest *t = &subtests[i]; + + if (!test__start_subtest(t->subtest_name)) + continue; + + prog = bpf_object__find_program_by_title(obj, t->prog_name); + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", + t->prog_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + err = bpf_map_update_elem(bpf_map__fd(bss_map), &zero, &bss, 0); + if (CHECK(err, "set_bss", "failed to set bss data: %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n", + t->prog_name, PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger probe */ + usleep(1); + + bpf_link__destroy(link); + link = NULL; + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, &bss); + if (CHECK(err, "get_bss", "failed to get bss data: %d\n", err)) + goto cleanup; + if (CHECK(bss.did_run == 0, "check_run", + "prog '%s' didn't run?\n", t->prog_name)) + goto cleanup; + if (CHECK(bss.iters != t->exp_iters, "check_iters", + "prog '%s' iters: %d, expected: %d\n", + t->prog_name, bss.iters, t->exp_iters)) + goto cleanup; + if (CHECK(bss.sum != t->exp_sum, "check_sum", + "prog '%s' sum: %d, expected: %d\n", + t->prog_name, bss.sum, t->exp_sum)) + goto cleanup; + } + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 5c78e2b5a917..86cee820d4d3 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -3,16 +3,26 @@ void test_reference_tracking(void) { - const char *file = "./test_sk_lookup_kern.o"; + const char *file = "test_sk_lookup_kern.o"; + const char *obj_name = "ref_track"; + LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .object_name = obj_name, + .relaxed_maps = true, + ); struct bpf_object *obj; struct bpf_program *prog; __u32 duration = 0; int err = 0; - obj = bpf_object__open(file); + obj = bpf_object__open_file(file, &open_opts); if (CHECK_FAIL(IS_ERR(obj))) return; + if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", + "wrong obj name '%s', expected '%s'\n", + bpf_object__name(obj), obj_name)) + goto cleanup; + bpf_object__for_each_program(prog, obj) { const char *title; @@ -35,5 +45,7 @@ void test_reference_tracking(void) } CHECK(err, title, "\n"); } + +cleanup: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index 3a62119c7498..35c512818a56 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -62,6 +62,10 @@ struct padded_a_lot { * long: 64; * long: 64; * int b; + * long: 32; + * long: 64; + * long: 64; + * long: 64; *}; * */ @@ -95,7 +99,6 @@ struct zone_padding { struct zone { int a; short b; - short: 16; struct zone_padding __pad__; }; diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index f686a8138d90..9a6bdeb4894c 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -1,5 +1,14 @@ #include <stdint.h> #include <stdbool.h> +/* + * KERNEL + */ + +struct core_reloc_kernel_output { + int valid[10]; + char comm[16]; + int comm_len; +}; /* * FLAVORS diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c index 7cdb7f878310..40ac722a9da5 100644 --- a/tools/testing/selftests/bpf/progs/loop1.c +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -7,6 +7,7 @@ #include <stdbool.h> #include <linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c index 9b2f808a2863..bb80f29aa7f7 100644 --- a/tools/testing/selftests/bpf/progs/loop2.c +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -7,6 +7,7 @@ #include <stdbool.h> #include <linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index d727657d51e2..2b9165a7afe1 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -7,6 +7,7 @@ #include <stdbool.h> #include <linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_tracing.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 9a3d1c79e6fe..1bafbb944e37 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -14,13 +14,12 @@ struct sockopt_sk { __u8 val; }; -struct bpf_map_def SEC("maps") socket_storage_map = { - .type = BPF_MAP_TYPE_SK_STORAGE, - .key_size = sizeof(int), - .value_size = sizeof(struct sockopt_sk), - .map_flags = BPF_F_NO_PREALLOC, -}; -BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk); +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct sockopt_sk); +} socket_storage_map SEC(".maps"); SEC("cgroup/getsockopt") int _getsockopt(struct bpf_sockopt *ctx) diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 233bdcb1659e..2cf813a06b83 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -13,13 +13,12 @@ struct tcp_rtt_storage { __u32 icsk_retransmits; }; -struct bpf_map_def SEC("maps") socket_storage_map = { - .type = BPF_MAP_TYPE_SK_STORAGE, - .key_size = sizeof(int), - .value_size = sizeof(struct tcp_rtt_storage), - .map_flags = BPF_F_NO_PREALLOC, -}; -BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage); +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tcp_rtt_storage); +} socket_storage_map SEC(".maps"); SEC("sockops") int _sockops(struct bpf_sock_ops *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 63a8dfef893b..534621e38906 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -49,4 +49,3 @@ int handle_uprobe_return(struct pt_regs *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index e5c79fe0ffdb..763c51447c19 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -2,6 +2,7 @@ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 5ee3622ddebb..96f9e8451029 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -2,6 +2,7 @@ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> #include "bpf_helpers.h" +#include "bpf_legacy.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c index bf67f0fdf743..96b1f5f3b07a 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -31,6 +32,8 @@ struct core_reloc_arrays { struct core_reloc_arrays_substruct d[1][2]; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_arrays(void *ctx) { @@ -38,16 +41,16 @@ int test_core_arrays(void *ctx) struct core_reloc_arrays_output *out = (void *)&data.out; /* in->a[2] */ - if (BPF_CORE_READ(&out->a2, &in->a[2])) + if (CORE_READ(&out->a2, &in->a[2])) return 1; /* in->b[1][2][3] */ - if (BPF_CORE_READ(&out->b123, &in->b[1][2][3])) + if (CORE_READ(&out->b123, &in->b[1][2][3])) return 1; /* in->c[1].c */ - if (BPF_CORE_READ(&out->c1c, &in->c[1].c)) + if (CORE_READ(&out->c1c, &in->c[1].c)) return 1; /* in->d[0][0].d */ - if (BPF_CORE_READ(&out->d00d, &in->d[0][0].d)) + if (CORE_READ(&out->d00d, &in->d[0][0].d)) return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c index 9fda73e87972..71fd7cebc9d7 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -39,6 +40,8 @@ struct core_reloc_flavors___weird { }; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_flavors(void *ctx) { @@ -48,13 +51,13 @@ int test_core_flavors(void *ctx) struct core_reloc_flavors *out = (void *)&data.out; /* read a using weird layout */ - if (BPF_CORE_READ(&out->a, &in_weird->a)) + if (CORE_READ(&out->a, &in_weird->a)) return 1; /* read b using reversed layout */ - if (BPF_CORE_READ(&out->b, &in_rev->b)) + if (CORE_READ(&out->b, &in_rev->b)) return 1; /* read c using original layout */ - if (BPF_CORE_READ(&out->c, &in_orig->c)) + if (CORE_READ(&out->c, &in_orig->c)) return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c index d99233c8008a..ad5c3f59c9c6 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -23,20 +24,22 @@ struct core_reloc_ints { int64_t s64_field; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_ints(void *ctx) { struct core_reloc_ints *in = (void *)&data.in; struct core_reloc_ints *out = (void *)&data.out; - if (BPF_CORE_READ(&out->u8_field, &in->u8_field) || - BPF_CORE_READ(&out->s8_field, &in->s8_field) || - BPF_CORE_READ(&out->u16_field, &in->u16_field) || - BPF_CORE_READ(&out->s16_field, &in->s16_field) || - BPF_CORE_READ(&out->u32_field, &in->u32_field) || - BPF_CORE_READ(&out->s32_field, &in->s32_field) || - BPF_CORE_READ(&out->u64_field, &in->u64_field) || - BPF_CORE_READ(&out->s64_field, &in->s64_field)) + if (CORE_READ(&out->u8_field, &in->u8_field) || + CORE_READ(&out->s8_field, &in->s8_field) || + CORE_READ(&out->u16_field, &in->u16_field) || + CORE_READ(&out->s16_field, &in->s16_field) || + CORE_READ(&out->u32_field, &in->u32_field) || + CORE_READ(&out->s32_field, &in->s32_field) || + CORE_READ(&out->u64_field, &in->u64_field) || + CORE_READ(&out->s64_field, &in->s64_field)) return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index 37e02aa3f0c8..50f609618b65 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -12,24 +13,78 @@ static volatile struct data { char out[256]; } data; +struct core_reloc_kernel_output { + int valid[10]; + char comm[16]; + int comm_len; +}; + struct task_struct { int pid; int tgid; + char comm[16]; + struct task_struct *group_leader; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_kernel(void *ctx) { struct task_struct *task = (void *)bpf_get_current_task(); + struct core_reloc_kernel_output *out = (void *)&data.out; uint64_t pid_tgid = bpf_get_current_pid_tgid(); + uint32_t real_tgid = (uint32_t)pid_tgid; int pid, tgid; - if (BPF_CORE_READ(&pid, &task->pid) || - BPF_CORE_READ(&tgid, &task->tgid)) + if (CORE_READ(&pid, &task->pid) || + CORE_READ(&tgid, &task->tgid)) return 1; /* validate pid + tgid matches */ - data.out[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid; + out->valid[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid; + + /* test variadic BPF_CORE_READ macros */ + out->valid[1] = BPF_CORE_READ(task, + tgid) == real_tgid; + out->valid[2] = BPF_CORE_READ(task, + group_leader, + tgid) == real_tgid; + out->valid[3] = BPF_CORE_READ(task, + group_leader, group_leader, + tgid) == real_tgid; + out->valid[4] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + tgid) == real_tgid; + out->valid[5] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, + tgid) == real_tgid; + out->valid[6] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, + tgid) == real_tgid; + out->valid[7] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, + tgid) == real_tgid; + out->valid[8] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, + group_leader, + tgid) == real_tgid; + out->valid[9] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, + group_leader, group_leader, + tgid) == real_tgid; + + /* test BPF_CORE_READ_STR_INTO() returns correct code and contents */ + out->comm_len = BPF_CORE_READ_STR_INTO( + &out->comm, task, + group_leader, group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, group_leader, + comm); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c index c59984bd3e23..1a36b0856653 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -32,6 +33,8 @@ struct core_reloc_misc_extensible { int b; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_misc(void *ctx) { @@ -41,15 +44,15 @@ int test_core_misc(void *ctx) struct core_reloc_misc_output *out = (void *)&data.out; /* record two different relocations with the same accessor string */ - if (BPF_CORE_READ(&out->a, &in_a->a1) || /* accessor: 0:0 */ - BPF_CORE_READ(&out->b, &in_b->b1)) /* accessor: 0:0 */ + if (CORE_READ(&out->a, &in_a->a1) || /* accessor: 0:0 */ + CORE_READ(&out->b, &in_b->b1)) /* accessor: 0:0 */ return 1; /* Validate relocations capture array-only accesses for structs with * fixed header, but with potentially extendable tail. This will read * first 4 bytes of 2nd element of in_ext array of potentially * variably sized struct core_reloc_misc_extensible. */ - if (BPF_CORE_READ(&out->c, &in_ext[2])) /* accessor: 2 */ + if (CORE_READ(&out->c, &in_ext[2])) /* accessor: 2 */ return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c index f98b942c062b..3199fafede2c 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -41,20 +42,22 @@ struct core_reloc_mods { core_reloc_mods_substruct_t h; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_mods(void *ctx) { struct core_reloc_mods *in = (void *)&data.in; struct core_reloc_mods_output *out = (void *)&data.out; - if (BPF_CORE_READ(&out->a, &in->a) || - BPF_CORE_READ(&out->b, &in->b) || - BPF_CORE_READ(&out->c, &in->c) || - BPF_CORE_READ(&out->d, &in->d) || - BPF_CORE_READ(&out->e, &in->e[2]) || - BPF_CORE_READ(&out->f, &in->f[1]) || - BPF_CORE_READ(&out->g, &in->g.x) || - BPF_CORE_READ(&out->h, &in->h.y)) + if (CORE_READ(&out->a, &in->a) || + CORE_READ(&out->b, &in->b) || + CORE_READ(&out->c, &in->c) || + CORE_READ(&out->d, &in->d) || + CORE_READ(&out->e, &in->e[2]) || + CORE_READ(&out->f, &in->f[1]) || + CORE_READ(&out->g, &in->g.x) || + CORE_READ(&out->h, &in->h.y)) return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c index 3ca30cec2b39..98238cb64fbd 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -30,15 +31,17 @@ struct core_reloc_nesting { } b; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_nesting(void *ctx) { struct core_reloc_nesting *in = (void *)&data.in; struct core_reloc_nesting *out = (void *)&data.out; - if (BPF_CORE_READ(&out->a.a.a, &in->a.a.a)) + if (CORE_READ(&out->a.a.a, &in->a.a.a)) return 1; - if (BPF_CORE_READ(&out->b.b.b, &in->b.b.b)) + if (CORE_READ(&out->b.b.b, &in->b.b.b)) return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c index add52f23ab35..4f3ecb9127bb 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -25,17 +26,19 @@ struct core_reloc_primitives { int (*f)(const char *); }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_primitives(void *ctx) { struct core_reloc_primitives *in = (void *)&data.in; struct core_reloc_primitives *out = (void *)&data.out; - if (BPF_CORE_READ(&out->a, &in->a) || - BPF_CORE_READ(&out->b, &in->b) || - BPF_CORE_READ(&out->c, &in->c) || - BPF_CORE_READ(&out->d, &in->d) || - BPF_CORE_READ(&out->f, &in->f)) + if (CORE_READ(&out->a, &in->a) || + CORE_READ(&out->b, &in->b) || + CORE_READ(&out->c, &in->c) || + CORE_READ(&out->d, &in->d) || + CORE_READ(&out->f, &in->f)) return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c index 526b7ddc7ea1..27f602f00419 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <stdint.h> #include "bpf_helpers.h" +#include "bpf_core_read.h" char _license[] SEC("license") = "GPL"; @@ -16,13 +17,15 @@ struct core_reloc_ptr_as_arr { int a; }; +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + SEC("raw_tracepoint/sys_enter") int test_core_ptr_as_arr(void *ctx) { struct core_reloc_ptr_as_arr *in = (void *)&data.in; struct core_reloc_ptr_as_arr *out = (void *)&data.out; - if (BPF_CORE_READ(&out->a, &in[2].a)) + if (CORE_READ(&out->a, &in[2].a)) return 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index f8ffa3f3d44b..6a4a8f57f174 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -47,12 +47,11 @@ struct { * issue and avoid complicated C programming massaging. * This is an acceptable workaround since there is one entry here. */ -typedef __u64 raw_stack_trace_t[2 * MAX_STACK_RAWTP]; struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); __type(key, __u32); - __type(value, raw_stack_trace_t); + __type(value, __u64[2 * MAX_STACK_RAWTP]); } rawdata_map SEC(".maps"); SEC("raw_tracepoint/sys_enter") @@ -100,4 +99,3 @@ int bpf_prog1(void *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index 876c27deb65a..07c09ca5546a 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -22,4 +22,3 @@ int handle_sys_nanosleep_entry(struct pt_regs *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c new file mode 100644 index 000000000000..52d94e8b214d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" + +static volatile const struct { + unsigned a[4]; + /* + * if the struct's size is multiple of 16, compiler will put it into + * .rodata.cst16 section, which is not recognized by libbpf; work + * around this by ensuring we don't have 16-aligned struct + */ + char _y; +} rdonly_values = { .a = {2, 3, 4, 5} }; + +static volatile struct { + unsigned did_run; + unsigned iters; + unsigned sum; +} res; + +SEC("raw_tracepoint/sys_enter:skip_loop") +int skip_loop(struct pt_regs *ctx) +{ + /* prevent compiler to optimize everything out */ + unsigned * volatile p = (void *)&rdonly_values.a; + unsigned iters = 0, sum = 0; + + /* we should never enter this loop */ + while (*p & 1) { + iters++; + sum += *p; + p++; + } + res.did_run = 1; + res.iters = iters; + res.sum = sum; + return 0; +} + +SEC("raw_tracepoint/sys_enter:part_loop") +int part_loop(struct pt_regs *ctx) +{ + /* prevent compiler to optimize everything out */ + unsigned * volatile p = (void *)&rdonly_values.a; + unsigned iters = 0, sum = 0; + + /* validate verifier can derive loop termination */ + while (*p < 5) { + iters++; + sum += *p; + p++; + } + res.did_run = 1; + res.iters = iters; + res.sum = sum; + return 0; +} + +SEC("raw_tracepoint/sys_enter:full_loop") +int full_loop(struct pt_regs *ctx) +{ + /* prevent compiler to optimize everything out */ + unsigned * volatile p = (void *)&rdonly_values.a; + int i = sizeof(rdonly_values.a) / sizeof(rdonly_values.a[0]); + unsigned iters = 0, sum = 0; + + /* validate verifier can allow full loop as well */ + while (i > 0 ) { + iters++; + sum += *p; + p++; + i--; + } + res.did_run = 1; + res.iters = iters; + res.sum = sum; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index fa0be3e10a10..3b7e1dca8829 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -74,4 +74,3 @@ int oncpu(struct sched_switch_args *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index e2d06191bd35..a8485ae103d1 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -18,19 +18,55 @@ fi # this is the case and run it with in_netns.sh if it is being run in the root # namespace. if [[ -z $(ip netns identify $$) ]]; then + err=0 + if bpftool="$(which bpftool)"; then + echo "Testing global flow dissector..." + + $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \ + type flow_dissector + + if ! unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected unsuccessful attach in namespace" >&2 + err=1 + fi + + $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ + flow_dissector + + if unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected successful attach in namespace" >&2 + err=1 + fi + + if ! $bpftool prog detach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Failed to detach flow dissector" >&2 + err=1 + fi + + rm -rf /sys/fs/bpf/flow + else + echo "Skipping root flow dissector test, bpftool not found" >&2 + fi + + # Run the rest of the tests in a net namespace. ../net/in_netns.sh "$0" "$@" - exit $? -fi + err=$(( $err + $? )) -# Determine selftest success via shell exit code -exit_handler() -{ - if (( $? == 0 )); then + if (( $err == 0 )); then echo "selftests: $TESTNAME [PASS]"; else echo "selftests: $TESTNAME [FAILED]"; fi + exit $err +fi + +# Determine selftest success via shell exit code +exit_handler() +{ set +e # Cleanup diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c index 1fc4e61e9f9f..1af37187dc12 100644 --- a/tools/testing/selftests/bpf/verifier/loops1.c +++ b/tools/testing/selftests/bpf/verifier/loops1.c @@ -187,3 +187,20 @@ .prog_type = BPF_PROG_TYPE_XDP, .retval = 55, }, +{ + "taken loop with back jump to 1st insn, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, -3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 55, +}, diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh new file mode 100644 index 000000000000..f7c168decd1e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../mirror_gre_scale.sh + +mirror_gre_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get span_agents) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh new file mode 100755 index 000000000000..2b5f4f7cc905 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +current_test="" + +cleanup() +{ + pre_cleanup + if [ ! -z $current_test ]; then + ${current_test}_cleanup + fi +} + +trap cleanup EXIT + +ALL_TESTS="tc_flower mirror_gre" +for current_test in ${TESTS:-$ALL_TESTS}; do + source ${current_test}_scale.sh + + num_netifs_var=${current_test^^}_NUM_NETIFS + num_netifs=${!num_netifs_var:-$NUM_NETIFS} + + for should_fail in 0 1; do + RET=0 + target=$(${current_test}_get_target "$should_fail") + ${current_test}_setup_prepare + setup_wait $num_netifs + ${current_test}_test "$target" "$should_fail" + ${current_test}_cleanup + if [[ "$should_fail" -eq 0 ]]; then + log_test "'$current_test' $target" + else + log_test "'$current_test' overflow $target" + fi + done +done +current_test="" + +exit "$RET" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh new file mode 100644 index 000000000000..a0795227216e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_flower_scale.sh + +tc_flower_get_target() +{ + local should_fail=$1; shift + + # The driver associates a counter with each tc filter, which means the + # number of supported filters is bounded by the number of available + # counters. + # Currently, the driver supports 12K (12,288) flow counters and six of + # these are used for multicast routing. + local target=12282 + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh index 8d2186c7c62b..f7c168decd1e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh @@ -4,10 +4,13 @@ source ../mirror_gre_scale.sh mirror_gre_get_target() { local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get span_agents) if ((! should_fail)); then - echo 3 + echo $target else - echo 4 + echo $((target + 1)) fi } diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 115837355eaf..ee89cd2f5bee 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -3,7 +3,9 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="fw_flash_test params_test regions_test" +ALL_TESTS="fw_flash_test params_test regions_test reload_test \ + netns_reload_test resource_test dev_info_test \ + empty_reporter_test dummy_reporter_test" NUM_NETIFS=0 source $lib_dir/lib.sh @@ -142,6 +144,290 @@ regions_test() log_test "regions test" } +reload_test() +{ + RET=0 + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload" + + echo "y"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload to fail" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload not to fail" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after set not to fail" + + echo "y"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to forbid devlink reload" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to re-enable devlink reload" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after re-enable" + + log_test "reload test" +} + +netns_reload_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_err $? "Failed to reload from netns \"testns1\" into netns \"testns2\"" + + ip netns del testns2 + ip netns del testns1 + + log_test "netns reload test" +} + +DUMMYDEV="dummytest" + +res_val_get() +{ + local netns=$1 + local parentname=$2 + local name=$3 + local type=$4 + + cmd_jq "devlink -N $netns resource show $DL_HANDLE -j" \ + ".[][][] | select(.name == \"$parentname\").resources[] \ + | select(.name == \"$name\").$type" +} + +resource_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + # Create dummy dev to add the address and routes on. + + ip -n testns1 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns1 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns1 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + + local occ=$(res_val_get testns1 IPv4 fib occ) + local limit=$((occ+1)) + + # Set fib size limit to handle one another route only. + + devlink -N testns1 resource set $DL_HANDLE path IPv4/fib size $limit + check_err $? "Failed to set IPv4/fib resource size" + local size_new=$(res_val_get testns1 IPv4 fib size_new) + [ "$size_new" -eq "$limit" ] + check_err $? "Unexpected \"size_new\" value (got $size_new, expected $limit)" + + devlink -N testns1 dev reload $DL_HANDLE + check_err $? "Failed to reload" + local size=$(res_val_get testns1 IPv4 fib size) + [ "$size" -eq "$limit" ] + check_err $? "Unexpected \"size\" value (got $size, expected $limit)" + + # Insert 2 routes, the first is going to be inserted, + # the second is expected to fail to be inserted. + + ip -n testns1 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + ip -n testns1 r a 192.0.3.0/24 via 192.0.1.2 + check_fail $? "Unexpected successful route add over limit" + + # Now create another dummy in second network namespace and + # insert two routes. That is over the limit of the netdevsim + # instance in the first namespace. Move the netdevsim instance + # into the second namespace and expect it to fail. + + ip -n testns2 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns2 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns2 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + ip -n testns2 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + ip -n testns2 r a 192.0.3.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_fail $? "Unexpected successful reload from netns \"testns1\" into netns \"testns2\"" + + ip netns del testns2 + ip netns del testns1 + + log_test "resource test" +} + +info_get() +{ + local name=$1 + + cmd_jq "devlink dev info $DL_HANDLE -j" ".[][][\"$name\"]" "-e" +} + +dev_info_test() +{ + RET=0 + + driver=$(info_get "driver") + check_err $? "Failed to get driver name" + [ "$driver" == "netdevsim" ] + check_err $? "Unexpected driver name $driver" + + log_test "dev_info test" +} + +empty_reporter_test() +{ + RET=0 + + devlink health show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show empty reporter" + + devlink health dump show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show dump of empty reporter" + + devlink health diagnose $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed diagnose empty reporter" + + devlink health recover $DL_HANDLE reporter empty + check_err $? "Failed recover empty reporter" + + log_test "empty reporter test" +} + +check_reporter_info() +{ + local name=$1 + local expected_state=$2 + local expected_error=$3 + local expected_recover=$4 + local expected_grace_period=$5 + local expected_auto_recover=$6 + + local show=$(devlink health show $DL_HANDLE reporter $name -j | jq -e -r ".[][][]") + check_err $? "Failed show $name reporter" + + local state=$(echo $show | jq -r ".state") + [ "$state" == "$expected_state" ] + check_err $? "Unexpected \"state\" value (got $state, expected $expected_state)" + + local error=$(echo $show | jq -r ".error") + [ "$error" == "$expected_error" ] + check_err $? "Unexpected \"error\" value (got $error, expected $expected_error)" + + local recover=`echo $show | jq -r ".recover"` + [ "$recover" == "$expected_recover" ] + check_err $? "Unexpected \"recover\" value (got $recover, expected $expected_recover)" + + local grace_period=$(echo $show | jq -r ".grace_period") + check_err $? "Failed get $name reporter grace_period" + [ "$grace_period" == "$expected_grace_period" ] + check_err $? "Unexpected \"grace_period\" value (got $grace_period, expected $expected_grace_period)" + + local auto_recover=$(echo $show | jq -r ".auto_recover") + [ "$auto_recover" == "$expected_auto_recover" ] + check_err $? "Unexpected \"auto_recover\" value (got $auto_recover, expected $expected_auto_recover)" +} + +dummy_reporter_test() +{ + RET=0 + + check_reporter_info dummy healthy 0 0 0 false + + local BREAK_MSG="foo bar" + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy error 1 0 0 false + + local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) + check_err $? "Failed show dump of dummy reporter" + + local dump_break_msg=$(echo $dump | jq -r ".break_message") + [ "$dump_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected dump break message value (got $dump_break_msg, expected $BREAK_MSG)" + + devlink health dump clear $DL_HANDLE reporter dummy + check_err $? "Failed clear dump of dummy reporter" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 1 1 0 false + + devlink health set $DL_HANDLE reporter dummy auto_recover true + check_err $? "Failed to dummy reporter auto_recover option" + + check_reporter_info dummy healthy 1 1 0 true + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy healthy 2 2 0 true + + local diagnose=$(devlink health diagnose $DL_HANDLE reporter dummy -j -p) + check_err $? "Failed show diagnose of dummy reporter" + + local rcvrd_break_msg=$(echo $diagnose | jq -r ".recovered_break_message") + [ "$rcvrd_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected recovered break message value (got $rcvrd_break_msg, expected $BREAK_MSG)" + + devlink health set $DL_HANDLE reporter dummy grace_period 10 + check_err $? "Failed to dummy reporter grace_period option" + + check_reporter_info dummy healthy 2 2 10 true + + echo "Y"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to fail" + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_fail $? "Unexpected success of dummy reporter break" + + check_reporter_info dummy error 3 2 10 true + + devlink health recover $DL_HANDLE reporter dummy + check_fail $? "Unexpected success of dummy reporter recover" + + echo "N"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to be successful" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 3 3 10 true + + log_test "dummy reporter test" +} + setup_prepare() { modprobe netdevsim diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh new file mode 100755 index 000000000000..7effd35369e1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="check_devlink_test check_ports_test" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +BUS_ADDR=10 +PORT_COUNT=4 +DEV_NAME=netdevsim$BUS_ADDR +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ +DL_HANDLE=netdevsim/$DEV_NAME +NETNS_NAME=testns1 + +port_netdev_get() +{ + local port_index=$1 + + cmd_jq "devlink -N $NETNS_NAME port show -j" \ + ".[][\"$DL_HANDLE/$port_index\"].netdev" "-e" +} + +check_ports_test() +{ + RET=0 + + for i in $(seq 0 $(expr $PORT_COUNT - 1)); do + netdev_name=$(port_netdev_get $i) + check_err $? "Failed to get netdev name for port $DL_HANDLE/$i" + ip -n $NETNS_NAME link show $netdev_name &> /dev/null + check_err $? "Failed to find netdev $netdev_name" + done + + log_test "check ports test" +} + +check_devlink_test() +{ + RET=0 + + devlink -N $NETNS_NAME dev show $DL_HANDLE &> /dev/null + check_err $? "Failed to show devlink instance" + + log_test "check devlink test" +} + +setup_prepare() +{ + modprobe netdevsim + ip netns add $NETNS_NAME + ip netns exec $NETNS_NAME \ + echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done +} + +cleanup() +{ + pre_cleanup + echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device + ip netns del $NETNS_NAME + modprobe -r netdevsim +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 85c587a03c8a..8b48ec54d058 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -254,6 +254,7 @@ cmd_jq() { local cmd=$1 local jq_exp=$2 + local jq_opts=$3 local ret local output @@ -263,7 +264,11 @@ cmd_jq() if [[ $ret -ne 0 ]]; then return $ret fi - output=$(echo $output | jq -r "$jq_exp") + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi echo $output # return success only in case of non-empty output [ ! -z "$output" ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json index 0d319f1d01db..54934203274c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json @@ -349,6 +349,31 @@ ] }, { + "id": "a5a7", + "name": "Add pedit action with LAYERED_OP eth set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+4: val 00001122 mask ffff0000.*key #1 at eth\\+8: val 33445566 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "86d4", "name": "Add pedit action with LAYERED_OP eth set src & dst", "category": [ @@ -374,6 +399,31 @@ ] }, { + "id": "f8a9", + "name": "Add pedit action with LAYERED_OP eth set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set 11:22:33:44:55:66", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+0: val 11223344 mask 00000000.*key #1 at eth\\+4: val 55660000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "c715", "name": "Add pedit action with LAYERED_OP eth set src (INVALID)", "category": [ @@ -399,6 +449,31 @@ ] }, { + "id": "8131", + "name": "Add pedit action with LAYERED_OP eth set dst (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set %e:11:m2:33:x4:-5", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "ba22", "name": "Add pedit action with LAYERED_OP eth type set/clear sequence", "category": [ @@ -424,6 +499,179 @@ ] }, { + "id": "dec4", + "name": "Add pedit action with LAYERED_OP eth set type (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth type set 0xabcdef", + "expExitCode": "255", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth+12: val ", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ab06", + "name": "Add pedit action with LAYERED_OP eth add type", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth type add 0x1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth\\+12: add 00010000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "918d", + "name": "Add pedit action with LAYERED_OP eth invert src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+4: val 0000ff00 mask ffff0000.*key #1 at eth\\+8: val 00000000 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "a8d4", + "name": "Add pedit action with LAYERED_OP eth invert dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth dst invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+0: val ff000000 mask 00000000.*key #1 at eth\\+4: val 00000000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "ee13", + "name": "Add pedit action with LAYERED_OP eth invert type", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth type invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth\\+12: val ffff0000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "7588", + "name": "Add pedit action with LAYERED_OP ip set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip src set 1.1.1.1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 12: val 01010101 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "0fa7", + "name": "Add pedit action with LAYERED_OP ip set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip dst set 2.2.2.2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 16: val 02020202 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "5810", "name": "Add pedit action with LAYERED_OP ip set src & dst", "category": [ @@ -674,6 +922,56 @@ ] }, { + "id": "815c", + "name": "Add pedit action with LAYERED_OP ip6 set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 src set 2001:0db8:0:f101::1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+8: val 20010db8 mask 00000000.*key #1 at ipv6\\+12: val 0000f101 mask 00000000.*key #2 at ipv6\\+16: val 00000000 mask 00000000.*key #3 at ipv6\\+20: val 00000001 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "4dae", + "name": "Add pedit action with LAYERED_OP ip6 set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 dst set 2001:0db8:0:f101::1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+24: val 20010db8 mask 00000000.*key #1 at ipv6\\+28: val 0000f101 mask 00000000.*key #2 at ipv6\\+32: val 00000000 mask 00000000.*key #3 at ipv6\\+36: val 00000001 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "fc1f", "name": "Add pedit action with LAYERED_OP ip6 set src & dst", "category": [ @@ -950,5 +1248,4 @@ "$TC actions flush action pedit" ] } - ] |